diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 352acee23df2d..cf604822d6eea 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -278,7 +278,7 @@ Please try to maintain backward compatibility. *pandas* has lots of users with l Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. -Like many packages, *pandas* uses the [Nose testing system](http://nose.readthedocs.org/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). +Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). #### Writing tests @@ -323,7 +323,7 @@ Performance matters and it is worth considering whether your code has introduced > > The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code. -To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](http://asv.readthedocs.org/en/latest/installing.html). +To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html). To install asv: @@ -360,7 +360,7 @@ This command is equivalent to: This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`. -Information on how to write a benchmark can be found in the [asv documentation](http://asv.readthedocs.org/en/latest/writing_benchmarks.html). +Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html). #### Running the vbench performance test suite (phasing out) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 6eac7b4831f0f..9807639143ddb 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -15,6 +15,14 @@ def setup(self): self.int = pd.Int64Index(np.arange(N).repeat(5)) self.float = pd.Float64Index(np.random.randn(N).repeat(5)) + # Convenience naming. + self.checked_add = pd.core.nanops._checked_add_with_arr + + self.arr = np.arange(1000000) + self.arrpos = np.arange(1000000) + self.arrneg = np.arange(-1000000, 0) + self.arrmixed = np.array([1, -1]).repeat(500000) + def time_int_factorize(self): self.int.factorize() @@ -29,3 +37,21 @@ def time_int_duplicated(self): def time_float_duplicated(self): self.float.duplicated() + + def time_add_overflow_pos_scalar(self): + self.checked_add(self.arr, 1) + + def time_add_overflow_neg_scalar(self): + self.checked_add(self.arr, -1) + + def time_add_overflow_zero_scalar(self): + self.checked_add(self.arr, 0) + + def time_add_overflow_pos_arr(self): + self.checked_add(self.arr, self.arrpos) + + def time_add_overflow_neg_arr(self): + self.checked_add(self.arr, self.arrneg) + + def time_add_overflow_mixed_arr(self): + self.checked_add(self.arr, self.arrmixed) diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py index 2b10cb88a3134..de9aa18937985 100644 --- a/asv_bench/benchmarks/attrs_caching.py +++ b/asv_bench/benchmarks/attrs_caching.py @@ -20,4 +20,4 @@ def setup(self): self.cur_index = self.df.index def time_setattr_dataframe_index(self): - self.df.index = self.cur_index \ No newline at end of file + self.df.index = self.cur_index diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index 265ffbc7261ca..f68cf9399c546 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -49,4 +49,4 @@ def setup(self): self.s = Series(([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')] * 1000)) def time_index_from_series_ctor(self): - Index(self.s) \ No newline at end of file + Index(self.s) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 85f3c1628bd8b..6f40611e68531 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -1703,4 +1703,4 @@ def setup(self): self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values] def time_series_ctor_from_dict(self): - Series(self.some_dict) \ No newline at end of file + Series(self.some_dict) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index e12b00dd06b39..5f3671012e6d5 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -548,6 +548,32 @@ def time_groupby_sum(self): self.df.groupby(['a'])['b'].sum() +class groupby_period(object): + # GH 14338 + goal_time = 0.2 + + def make_grouper(self, N): + return pd.period_range('1900-01-01', freq='D', periods=N) + + def setup(self): + N = 10000 + self.grouper = self.make_grouper(N) + self.df = pd.DataFrame(np.random.randn(N, 2)) + + def time_groupby_sum(self): + self.df.groupby(self.grouper).sum() + + +class groupby_datetime(groupby_period): + def make_grouper(self, N): + return pd.date_range('1900-01-01', freq='D', periods=N) + + +class groupby_datetimetz(groupby_period): + def make_grouper(self, N): + return pd.date_range('1900-01-01', freq='D', periods=N, + tz='US/Central') + #---------------------------------------------------------------------- # Series.value_counts diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index 7638cc2a0f8df..659fc4941da54 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -348,4 +348,4 @@ def remove(self, f): try: os.remove(self.f) except: - pass \ No newline at end of file + pass diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index a0a1b560d36f3..2c94f9b2b1e8c 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -344,4 +344,4 @@ def setup(self): self.mi = MultiIndex.from_product([self.level1, self.level2]) def time_multiindex_with_datetime_level_sliced(self): - self.mi[:10].values \ No newline at end of file + self.mi[:10].values diff --git a/asv_bench/benchmarks/io_sql.py b/asv_bench/benchmarks/io_sql.py index 9a6b21f9e067a..c583ac1768c90 100644 --- a/asv_bench/benchmarks/io_sql.py +++ b/asv_bench/benchmarks/io_sql.py @@ -212,4 +212,4 @@ def setup(self): self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) def time_sql_write_sqlalchemy(self): - self.df.to_sql('test1', self.engine, if_exists='replace') \ No newline at end of file + self.df.to_sql('test1', self.engine, if_exists='replace') diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index 3f80c4c0c6338..5419571c75b43 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -547,6 +547,31 @@ def remove(self, f): pass +class packers_write_json_lines(object): + goal_time = 0.2 + + def setup(self): + self.f = '__test__.msg' + self.N = 100000 + self.C = 5 + self.index = date_range('20000101', periods=self.N, freq='H') + self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) + self.remove(self.f) + self.df.index = np.arange(self.N) + + def time_packers_write_json_lines(self): + self.df.to_json(self.f, orient="records", lines=True) + + def teardown(self): + self.remove(self.f) + + def remove(self, f): + try: + os.remove(self.f) + except: + pass + + class packers_write_json_T(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index 0b0e73847aa96..4f6fd4a5a2df8 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -61,4 +61,4 @@ def setup(self): self.data_frames[x] = self.df def time_panel_from_dict_two_different_indexes(self): - Panel.from_dict(self.data_frames) \ No newline at end of file + Panel.from_dict(self.data_frames) diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index 90118eaf6e407..0bd572db2211a 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -53,4 +53,4 @@ def setup(self): self.panel = Panel(np.random.randn(100, len(self.index), 1000)) def time_panel_shift_minor(self): - self.panel.shift(1, axis='minor') \ No newline at end of file + self.panel.shift(1, axis='minor') diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index e9f33ebfce0bd..869ddd8d6fa49 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -45,4 +45,4 @@ def setup(self): self.ts = Series(np.random.randn(self.N), index=self.rng) def time_replace_replacena(self): - self.ts.replace(np.nan, 0.0, inplace=True) \ No newline at end of file + self.ts.replace(np.nan, 0.0, inplace=True) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 604fa5092a231..ab235e085986c 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -73,4 +73,4 @@ def setup(self): break def time_unstack_sparse_keyspace(self): - self.idf.unstack() \ No newline at end of file + self.idf.unstack() diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index daf5135e64c40..12fbb2478c2a5 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -258,4 +258,4 @@ def time_rolling_skew(self): rolling_skew(self.arr, self.win) def time_rolling_kurt(self): - rolling_kurt(self.arr, self.win) \ No newline at end of file + rolling_kurt(self.arr, self.win) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index e4f91b1b9c0c6..d64606214ca6a 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -390,4 +390,4 @@ def time_strings_upper(self): self.many.str.upper() def make_series(self, letters, strlen, size): - return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) \ No newline at end of file + return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 9719fd87dfb2e..8470525dd01fa 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,5 +1,5 @@ from .pandas_vb_common import * -from pandas import to_timedelta +from pandas import to_timedelta, Timestamp class timedelta_convert_int(object): @@ -47,3 +47,14 @@ def time_timedelta_convert_coerce(self): def time_timedelta_convert_ignore(self): to_timedelta(self.arr, errors='ignore') + + +class timedelta_add_overflow(object): + goal_time = 0.2 + + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp('2000') + + def test_add_td_ts(self): + self.td + self.ts diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh index 6f16dce2fb431..cadc356b641f9 100755 --- a/ci/prep_cython_cache.sh +++ b/ci/prep_cython_cache.sh @@ -3,8 +3,8 @@ ls "$HOME/.cache/" PYX_CACHE_DIR="$HOME/.cache/pyxfiles" -pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"` -pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx"` +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"` +pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx" -o -name "*.pxd"` CACHE_File="$HOME/.cache/cython_files.tar" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh index 4f60df0ccb2d8..5c98c3df61736 100755 --- a/ci/submit_cython_cache.sh +++ b/ci/submit_cython_cache.sh @@ -2,7 +2,7 @@ CACHE_File="$HOME/.cache/cython_files.tar" PYX_CACHE_DIR="$HOME/.cache/pyxfiles" -pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"` +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"` rm -rf $CACHE_File rm -rf $PYX_CACHE_DIR diff --git a/doc/README.rst b/doc/README.rst index a93ad32a4c8f8..a3733846d9ed1 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -155,9 +155,9 @@ Where to start? --------------- There are a number of issues listed under `Docs -`_ +`_ and `Good as first PR -`_ +`_ where you could start out. Or maybe you have an idea of your own, by using pandas, looking for something diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst index e38a9f22f9d99..a2f0eb5e068c4 100644 --- a/doc/_templates/autosummary/accessor_attribute.rst +++ b/doc/_templates/autosummary/accessor_attribute.rst @@ -3,4 +3,4 @@ .. currentmodule:: {{ module.split('.')[0] }} -.. autoaccessorattribute:: {{ [module.split('.')[1], objname]|join('.') }} \ No newline at end of file +.. autoaccessorattribute:: {{ [module.split('.')[1], objname]|join('.') }} diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst index 8175d8615ceb2..43dfc3b813120 100644 --- a/doc/_templates/autosummary/accessor_method.rst +++ b/doc/_templates/autosummary/accessor_method.rst @@ -3,4 +3,4 @@ .. currentmodule:: {{ module.split('.')[0] }} -.. autoaccessormethod:: {{ [module.split('.')[1], objname]|join('.') }} \ No newline at end of file +.. autoaccessormethod:: {{ [module.split('.')[1], objname]|join('.') }} diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 19318aad3d53d..e5aa6b577270a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1794,18 +1794,18 @@ The following functions are available for one dimensional object arrays or scala - :meth:`~pandas.to_datetime` (conversion to datetime objects) - .. ipython:: python + .. ipython:: python - import datetime - m = ['2016-07-09', datetime.datetime(2016, 3, 2)] - pd.to_datetime(m) + import datetime + m = ['2016-07-09', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m) - :meth:`~pandas.to_timedelta` (conversion to timedelta objects) - .. ipython:: python + .. ipython:: python - m = ['5us', pd.Timedelta('1day')] - pd.to_timedelta(m) + m = ['5us', pd.Timedelta('1day')] + pd.to_timedelta(m) To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index f52f72b49dd31..090998570a358 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -973,7 +973,7 @@ are not numeric data (even in the case that ``.categories`` is numeric). print("TypeError: " + str(e)) .. note:: - If such a function works, please file a bug at https://github.com/pydata/pandas! + If such a function works, please file a bug at https://github.com/pandas-dev/pandas! dtype in apply ~~~~~~~~~~~~~~ diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 85d432b546f21..7ec91d251f15d 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -116,7 +116,7 @@ Reading External Data Like SAS, pandas provides utilities for reading in data from many formats. The ``tips`` dataset, found within the pandas -tests (`csv `_) +tests (`csv `_) will be used in many of the following examples. SAS provides ``PROC IMPORT`` to read csv data into a data set. @@ -131,7 +131,7 @@ The pandas method is :func:`read_csv`, which works similarly. .. ipython:: python - url = 'https://raw.github.com/pydata/pandas/master/pandas/tests/data/tips.csv' + url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv' tips = pd.read_csv(url) tips.head() diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst index 099a0e9469058..7962e0e69faa1 100644 --- a/doc/source/comparison_with_sql.rst +++ b/doc/source/comparison_with_sql.rst @@ -23,7 +23,7 @@ structure. .. ipython:: python - url = 'https://raw.github.com/pydata/pandas/master/pandas/tests/data/tips.csv' + url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv' tips = pd.read_csv(url) tips.head() diff --git a/doc/source/conf.py b/doc/source/conf.py index fd3a2493a53e8..4f679f3f728bf 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -295,15 +295,15 @@ 'python': ('http://docs.python.org/3', None), 'numpy': ('http://docs.scipy.org/doc/numpy', None), 'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), - 'py': ('http://pylib.readthedocs.org/en/latest/', None) + 'py': ('https://pylib.readthedocs.io/en/latest/', None) } import glob autosummary_generate = glob.glob("*.rst") # extlinks alias -extlinks = {'issue': ('https://github.com/pydata/pandas/issues/%s', +extlinks = {'issue': ('https://github.com/pandas-dev/pandas/issues/%s', 'GH'), - 'wiki': ('https://github.com/pydata/pandas/wiki/%s', + 'wiki': ('https://github.com/pandas-dev/pandas/wiki/%s', 'wiki ')} ipython_exec_lines = [ @@ -468,10 +468,10 @@ def linkcode_resolve(domain, info): fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__)) if '+' in pandas.__version__: - return "http://github.com/pydata/pandas/blob/master/pandas/%s%s" % ( + return "http://github.com/pandas-dev/pandas/blob/master/pandas/%s%s" % ( fn, linespec) else: - return "http://github.com/pydata/pandas/blob/v%s/pandas/%s%s" % ( + return "http://github.com/pandas-dev/pandas/blob/v%s/pandas/%s%s" % ( pandas.__version__, fn, linespec) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7f336abcaa6d7..a8a47a9d979c0 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -14,11 +14,11 @@ All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. If you are simply looking to start working with the *pandas* codebase, navigate to the -`GitHub "issues" tab `_ and start looking through +`GitHub "issues" tab `_ and start looking through interesting issues. There are a number of issues listed under `Docs -`_ +`_ and `Difficulty Novice -`_ +`_ where you could start out. Or maybe through using *pandas* you have an idea of your own or are looking for something @@ -27,7 +27,7 @@ about it! Feel free to ask questions on the `mailing list `_ or on `Gitter -`_. +`_. Bug reports and enhancement requests ==================================== @@ -79,7 +79,7 @@ It can very quickly become overwhelming, but sticking to the guidelines below wi straightforward and mostly trouble free. As always, if you are having difficulties please feel free to ask for help. -The code is hosted on `GitHub `_. To +The code is hosted on `GitHub `_. To contribute you will need to sign up for a `free GitHub account `_. We use `Git `_ for version control to allow many people to work together on the project. @@ -103,12 +103,12 @@ Forking ------- You will need your own fork to work on the code. Go to the `pandas project -page `_ and hit the ``Fork`` button. You will +page `_ and hit the ``Fork`` button. You will want to clone your fork to your machine:: git clone git@github.com:your-user-name/pandas.git pandas-yourname cd pandas-yourname - git remote add upstream git://github.com/pydata/pandas.git + git remote add upstream git://github.com/pandas-dev/pandas.git This creates the directory `pandas-yourname` and connects your repository to the upstream (main project) *pandas* repository. @@ -360,7 +360,7 @@ follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source code. `nbconvert `_ and -`nbformat `_ are required to build +`nbformat `_ are required to build the Jupyter notebooks included in the documentation. If you have a conda environment named ``pandas_dev``, you can install the extra @@ -467,7 +467,7 @@ and make these changes with:: pep8radius master --diff --in-place Additional standards are outlined on the `code style wiki -page `_. +page `_. Please try to maintain backward compatibility. *pandas* has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, @@ -490,7 +490,7 @@ Adding tests is one of the most common requests after code is pushed to *pandas* it is worth getting in the habit of writing tests ahead of time so this is never an issue. Like many packages, *pandas* uses the `Nose testing system -`_ and the convenient +`_ and the convenient extensions in `numpy.testing `_. @@ -501,7 +501,7 @@ All tests should go into the ``tests`` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for inspiration. If your test requires working with files or network connectivity, there is more information on the `testing page -`_ of the wiki. +`_ of the wiki. The ``pandas.util.testing`` module has many special ``assert`` functions that make it easier to make statements about whether Series or DataFrame objects are @@ -569,7 +569,7 @@ supports both python2 and python3. To use all features of asv, you will need either ``conda`` or ``virtualenv``. For more details please check the `asv installation -webpage `_. +webpage `_. To install asv:: @@ -624,7 +624,7 @@ This will display stderr from the benchmarks, and use your local ``python`` that comes from your ``$PATH``. Information on how to write a benchmark and how to use asv can be found in the -`asv documentation `_. +`asv documentation `_. .. _contributing.gbq_integration_tests: @@ -639,7 +639,7 @@ on Travis-CI. The first step is to create a `service account Integration tests for ``pandas.io.gbq`` are skipped in pull requests because the credentials that are required for running Google BigQuery integration tests are `encrypted `__ -on Travis-CI and are only accessible from the pydata/pandas repository. The +on Travis-CI and are only accessible from the pandas-dev/pandas repository. The credentials won't be available on forks of pandas. Here are the steps to run gbq integration tests on a forked repository: @@ -688,7 +688,7 @@ performance regressions. You can run specific benchmarks using the ``-r`` flag, which takes a regular expression. -See the `performance testing wiki `_ for information +See the `performance testing wiki `_ for information on how to write a benchmark. Documenting your code @@ -712,8 +712,8 @@ directive is used. The sphinx syntax for that is: This will put the text *New in version 0.17.0* wherever you put the sphinx directive. This should also be put in the docstring when adding a new function -or method (`example `__) -or a new keyword argument (`example `__). +or method (`example `__) +or a new keyword argument (`example `__). Contributing your changes to *pandas* ===================================== @@ -806,8 +806,8 @@ like:: origin git@github.com:yourname/pandas.git (fetch) origin git@github.com:yourname/pandas.git (push) - upstream git://github.com/pydata/pandas.git (fetch) - upstream git://github.com/pydata/pandas.git (push) + upstream git://github.com/pandas-dev/pandas.git (fetch) + upstream git://github.com/pandas-dev/pandas.git (push) Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to happen, a pull request needs to be submitted on GitHub. diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 38a816060e1bc..3e84d15caf50b 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -200,7 +200,7 @@ The :ref:`indexing ` docs. df[(df.AAA <= 6) & (df.index.isin([0,2,4]))] `Use loc for label-oriented slicing and iloc positional slicing -`__ +`__ .. ipython:: python @@ -410,7 +410,7 @@ Sorting df.sort_values(by=('Labs', 'II'), ascending=False) `Partial Selection, the need for sortedness; -`__ +`__ Levels ****** @@ -787,7 +787,7 @@ The :ref:`Resample ` docs. `__ `Using TimeGrouper and another grouping to create subgroups, then apply a custom function -`__ +`__ `Resampling with custom periods `__ @@ -823,7 +823,7 @@ ignore_index is needed in pandas < v0.13, and depending on df construction df = df1.append(df2,ignore_index=True); df `Self Join of a DataFrame -`__ +`__ .. ipython:: python @@ -877,7 +877,7 @@ The :ref:`Plotting ` docs. `__ `Generate Embedded plots in excel files using Pandas, Vincent and xlsxwriter -`__ +`__ `Boxplot for each quartile of a stratifying variable `__ @@ -936,7 +936,7 @@ using that handle to read. `__ `Dealing with bad lines -`__ +`__ `Dealing with bad lines II `__ @@ -1075,7 +1075,7 @@ The :ref:`HDFStores ` docs `__ `Managing heterogeneous data using a linked multiple table hierarchy -`__ +`__ `Merging on-disk tables with millions of rows `__ @@ -1216,7 +1216,7 @@ Timedeltas The :ref:`Timedeltas ` docs. `Using timedeltas -`__ +`__ .. ipython:: python diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 17ebd1f163f4f..a37b1e89c7cc3 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -143,9 +143,9 @@ both "column wise min/max and global min/max coloring." API ----- -`pandas-datareader `__ +`pandas-datareader `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``pandas-datareader`` is a remote data access library for pandas. ``pandas.io`` from pandas < 0.17.0 is now refactored/split-off to and importable from ``pandas_datareader`` (PyPI:``pandas-datareader``). Many/most of the supported APIs have at least a documentation paragraph in the `pandas-datareader docs `_: +``pandas-datareader`` is a remote data access library for pandas. ``pandas.io`` from pandas < 0.17.0 is now refactored/split-off to and importable from ``pandas_datareader`` (PyPI:``pandas-datareader``). Many/most of the supported APIs have at least a documentation paragraph in the `pandas-datareader docs `_: The following data feeds are available: @@ -170,7 +170,7 @@ PyDatastream is a Python interface to the SOAP API to return indexed Pandas DataFrames or Panels with financial data. This package requires valid credentials for this API (non free). -`pandaSDMX `__ +`pandaSDMX `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pandaSDMX is an extensible library to retrieve and acquire statistical data and metadata disseminated in @@ -215,7 +215,7 @@ dimensional arrays, rather than the tabular data for which pandas excels. Out-of-core ------------- -`Dask `__ +`Dask `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dask is a flexible parallel computing library for analytics. Dask diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 99d7486cde2d0..cfac5c257184d 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -391,7 +391,7 @@ This is because ``reindex_like`` silently inserts ``NaNs`` and the ``dtype`` changes accordingly. This can cause some issues when using ``numpy`` ``ufuncs`` such as ``numpy.logical_and``. -See the `this old issue `__ for a more +See the `this old issue `__ for a more detailed discussion. Parsing Dates from Text Files diff --git a/doc/source/install.rst b/doc/source/install.rst index 6295e6f6cbb68..923c22aa9048f 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -13,7 +13,7 @@ This is the recommended installation method for most users. Instructions for installing from source, `PyPI `__, various Linux distributions, or a -`development version `__ are also provided. +`development version `__ are also provided. Python version support ---------------------- @@ -189,7 +189,7 @@ pandas is equipped with an exhaustive set of unit tests covering about 97% of the codebase as of this writing. To run it on your machine to verify that everything is working (and you have all of the dependencies, soft and hard, installed), make sure you have `nose -`__ and run: +`__ and run: :: diff --git a/doc/source/io.rst b/doc/source/io.rst index c07cfe4cd5574..ae71587c8b46b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2035,7 +2035,7 @@ You can even pass in an instance of ``StringIO`` if you so desire that having so many network-accessing functions slows down the documentation build. If you spot an error or an example that doesn't run, please do not hesitate to report it over on `pandas GitHub issues page - `__. + `__. Read a URL and match a table that contains specific text @@ -2639,8 +2639,8 @@ config options ` ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` files if `Xlsxwriter`_ is not available. -.. _XlsxWriter: http://xlsxwriter.readthedocs.org -.. _openpyxl: http://openpyxl.readthedocs.org/ +.. _XlsxWriter: https://xlsxwriter.readthedocs.io +.. _openpyxl: https://openpyxl.readthedocs.io/ .. _xlwt: http://www.python-excel.org To specify which writer you want to use, you can pass an engine keyword diff --git a/doc/source/overview.rst b/doc/source/overview.rst index b1addddc2121d..92caeec319169 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -81,7 +81,7 @@ Getting Support --------------- The first stop for pandas issues and ideas is the `Github Issue Tracker -`__. If you have a general question, +`__. If you have a general question, pandas community experts can answer through `Stack Overflow `__. @@ -103,7 +103,7 @@ training, and consulting for pandas. pandas is only made possible by a group of people around the world like you who have contributed new code, bug reports, fixes, comments and ideas. A -complete list can be found `on Github `__. +complete list can be found `on Github `__. Development Team ---------------- diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index f3df1ebdf25cb..f2a8668bbda91 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -17,7 +17,7 @@ rpy2 / R interface In v0.16.0, the ``pandas.rpy`` interface has been **deprecated and will be removed in a future version**. Similar functionality can be accessed - through the `rpy2 `__ project. + through the `rpy2 `__ project. See the :ref:`updating ` section for a guide to port your code from the ``pandas.rpy`` to ``rpy2`` functions. @@ -71,7 +71,7 @@ The ``convert_to_r_matrix`` function can be replaced by the normal Not all conversion functions in rpy2 are working exactly the same as the current methods in pandas. If you experience problems or limitations in comparison to the ones in pandas, please report this at the - `issue tracker `_. + `issue tracker `_. See also the documentation of the `rpy2 `__ project. diff --git a/doc/source/release.rst b/doc/source/release.rst index 7e987fcff31b3..d210065f04459 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -20,7 +20,7 @@ Release Notes ************* This is the list of changes to pandas between each release. For full details, -see the commit logs at http://github.com/pydata/pandas +see the commit logs at http://github.com/pandas-dev/pandas **What is it** @@ -33,7 +33,7 @@ analysis / manipulation tool available in any language. **Where to get it** -* Source code: http://github.com/pydata/pandas +* Source code: http://github.com/pandas-dev/pandas * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index 019aa82fed1aa..e2c713ac8519a 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -13,7 +13,7 @@ DataReader The sub-package ``pandas.io.data`` is removed in favor of a separately installable `pandas-datareader package -`_. This will allow the data +`_. This will allow the data modules to be independently updated to your pandas installation. The API for ``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. (:issue:`8961`) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index e92798ea17448..c25e734a046b2 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -138,7 +138,7 @@ Modern Pandas Excel charts with pandas, vincent and xlsxwriter ------------------------------------------------ -- `Using Pandas and XlsxWriter to create Excel charts `_ +- `Using Pandas and XlsxWriter to create Excel charts `_ Various Tutorials ----------------- diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 6e05c3ff0457a..e3b186abe53fc 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -892,7 +892,7 @@ for Fourier series. By coloring these curves differently for each class it is possible to visualize data clustering. Curves belonging to samples of the same class will usually be closer together and form larger structures. -**Note**: The "Iris" dataset is available `here `__. +**Note**: The "Iris" dataset is available `here `__. .. ipython:: python @@ -1044,7 +1044,7 @@ forces acting on our sample are at an equilibrium) is where a dot representing our sample will be drawn. Depending on which class that sample belongs it will be colored differently. -**Note**: The "Iris" dataset is available `here `__. +**Note**: The "Iris" dataset is available `here `__. .. ipython:: python diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index a91e0ab9e4961..181cd401c85d6 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -401,7 +401,7 @@ through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). All databases supported by SQLAlchemy can be used, such as PostgreSQL, MySQL, Oracle, Microsoft SQL server (see documentation of SQLAlchemy on `included dialects -`_). +`_). The functionality of providing DBAPI connection objects will only be supported for sqlite3 in the future. The ``'mysql'`` flavor is deprecated. diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index fc13224d3fe6e..9cb299593076d 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -141,7 +141,7 @@ as well as the ``.sum()`` operation. Releasing of the GIL could benefit an application that uses threads for user interactions (e.g. QT_), or performing multi-threaded computations. A nice example of a library that can handle these types of computation-in-parallel is the dask_ library. -.. _dask: https://dask.readthedocs.org/en/latest/ +.. _dask: https://dask.readthedocs.io/en/latest/ .. _QT: https://wiki.python.org/moin/PyQt .. _whatsnew_0170.plot: diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 3edb8c1fa9071..5180b9a092f6c 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -20,8 +20,8 @@ Highlights include: Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - - +- Fixed performance regression in factorization of ``Period`` data (:issue:`14338`) +- Improved Performance in ``.to_json()`` when ``lines=True`` (:issue:`14408`) @@ -34,7 +34,7 @@ Bug Fixes - +- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) @@ -44,4 +44,7 @@ Bug Fixes - Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) +- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`) - Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) +- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) +- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0354a8046e873..7fa9991138fba 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -62,6 +62,7 @@ Deprecations Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 990018f2f7f3b..1b8930dcae0f1 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -392,7 +392,7 @@ def __reduce__(self): # optional, for pickle support return type(self), args, None, None, list(self.items()) -# https://github.com/pydata/pandas/pull/9123 +# https://github.com/pandas-dev/pandas/pull/9123 def is_platform_little_endian(): """ am I little endian """ return sys.byteorder == 'little' diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 72fbc3906cafb..f480eae2dd04d 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1693,11 +1693,11 @@ def test_result_types(self): self.check_result_type(np.float64, np.float64) def test_result_types2(self): - # xref https://github.com/pydata/pandas/issues/12293 + # xref https://github.com/pandas-dev/pandas/issues/12293 raise nose.SkipTest("unreliable tests on complex128") # Did not test complex64 because DataFrame is converting it to - # complex128. Due to https://github.com/pydata/pandas/issues/10952 + # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 self.check_result_type(np.complex128, np.complex128) def test_undefined_func(self): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ee59d6552bb2f..8644d4568e44d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -285,18 +285,27 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ - from pandas import Index, Series, DatetimeIndex - - vals = np.asarray(values) - - # localize to UTC - is_datetimetz_type = is_datetimetz(values) - if is_datetimetz_type: - values = DatetimeIndex(values) - vals = values.asi8 + from pandas import Index, Series, DatetimeIndex, PeriodIndex + + # handling two possibilities here + # - for a numpy datetimelike simply view as i8 then cast back + # - for an extension datetimelike view as i8 then + # reconstruct from boxed values to transfer metadata + dtype = None + if needs_i8_conversion(values): + if is_period_dtype(values): + values = PeriodIndex(values) + vals = values.asi8 + elif is_datetimetz(values): + values = DatetimeIndex(values) + vals = values.asi8 + else: + # numpy dtype + dtype = values.dtype + vals = values.view(np.int64) + else: + vals = np.asarray(values) - is_datetime = is_datetime64_dtype(vals) - is_timedelta = is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) @@ -311,13 +320,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) - if is_datetimetz_type: - # reset tz - uniques = values._shallow_copy(uniques) - elif is_datetime: - uniques = uniques.astype('M8[ns]') - elif is_timedelta: - uniques = uniques.astype('m8[ns]') + if dtype is not None: + uniques = uniques.astype(dtype) + if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index db48f2a46eaf3..9efaff6060909 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1681,7 +1681,7 @@ def __setitem__(self, key, value): else: # There is a bug in numpy, which does not accept a Series as a # indexer - # https://github.com/pydata/pandas/issues/6168 + # https://github.com/pandas-dev/pandas/issues/6168 # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9 # FIXME: remove when numpy 1.9 is the lowest numpy version pandas # accepts... @@ -1690,7 +1690,7 @@ def __setitem__(self, key, value): lindexer = self.categories.get_indexer(rvalue) # FIXME: the following can be removed after GH7820 is fixed: - # https://github.com/pydata/pandas/issues/7820 + # https://github.com/pandas-dev/pandas/issues/7820 # float categories do currently return -1 for np.nan, even if np.nan is # included in the index -> "repair" this here if isnull(rvalue).any() and isnull(self.categories).any(): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3c376e3188eac..5223c0ac270f3 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2201,36 +2201,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, raise AssertionError('Level %s not in index' % str(level)) level = index.names.index(level) - inds = index.labels[level] - level_index = index.levels[level] - if self.name is None: self.name = index.names[level] - # XXX complete hack - - if grouper is not None: - level_values = index.levels[level].take(inds) - self.grouper = level_values.map(self.grouper) - else: - # all levels may not be observed - labels, uniques = algos.factorize(inds, sort=True) - - if len(uniques) > 0 and uniques[0] == -1: - # handle NAs - mask = inds != -1 - ok_labels, uniques = algos.factorize(inds[mask], sort=True) - - labels = np.empty(len(inds), dtype=inds.dtype) - labels[mask] = ok_labels - labels[~mask] = -1 - - if len(uniques) < len(level_index): - level_index = level_index.take(uniques) + self.grouper, self._labels, self._group_index = \ + index._get_grouper_for_level(self.grouper, level) - self._labels = labels - self._group_index = level_index - self.grouper = level_index.take(labels) else: if isinstance(self.grouper, (list, tuple)): self.grouper = com._asarray_tuplesafe(self.grouper) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 564586eec5a8e..d7d68ad536be5 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,6 +11,7 @@ import pandas.hashtable as _hash from pandas import compat, lib, algos, tslib +from pandas.compat.numpy import _np_version_under1p10 from pandas.types.common import (_ensure_int64, _ensure_object, _ensure_float64, _get_dtype, is_float, is_scalar, @@ -829,9 +830,37 @@ def _checked_add_with_arr(arr, b): Raises ------ - OverflowError if any x + y exceeds the maximum int64 value. + OverflowError if any x + y exceeds the maximum or minimum int64 value. """ - if (np.iinfo(np.int64).max - b < arr).any(): - raise OverflowError("Python int too large to " - "convert to C long") + # For performance reasons, we broadcast 'b' to the new array 'b2' + # so that it has the same size as 'arr'. + if _np_version_under1p10: + if lib.isscalar(b): + b2 = np.empty(arr.shape) + b2.fill(b) + else: + b2 = b + else: + b2 = np.broadcast_to(b, arr.shape) + + # gh-14324: For each element in 'arr' and its corresponding element + # in 'b2', we check the sign of the element in 'b2'. If it is positive, + # we then check whether its sum with the element in 'arr' exceeds + # np.iinfo(np.int64).max. If so, we have an overflow error. If it + # it is negative, we then check whether its sum with the element in + # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow + # error as well. + mask1 = b2 > 0 + mask2 = b2 < 0 + + if not mask1.any(): + to_raise = (np.iinfo(np.int64).min - b2 > arr).any() + elif not mask2.any(): + to_raise = (np.iinfo(np.int64).max - b2 < arr).any() + else: + to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or + (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any()) + + if to_raise: + raise OverflowError("Overflow in int64 addition") return arr + b diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5082fc84982c6..1c24a0db34b2b 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -432,6 +432,36 @@ def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace") + _index_shared_docs['_get_grouper_for_level'] = """ + Get index grouper corresponding to an index level + + Parameters + ---------- + mapper: Group mapping function or None + Function mapping index values to groups + level : int or None + Index level + + Returns + ------- + grouper : Index + Index of values to group on + labels : ndarray of int or None + Array of locations in level_index + uniques : Index or None + Index of unique values for level + """ + + @Appender(_index_shared_docs['_get_grouper_for_level']) + def _get_grouper_for_level(self, mapper, level=None): + assert level is None or level == 0 + if mapper is None: + grouper = self + else: + grouper = self.map(mapper) + + return grouper, None, None + def is_(self, other): """ More flexible, faster check like ``is`` but that works through views diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 0c465da24a17e..a9f452db69659 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -539,6 +539,37 @@ def _format_native_types(self, na_rep='nan', **kwargs): return mi.values + @Appender(_index_shared_docs['_get_grouper_for_level']) + def _get_grouper_for_level(self, mapper, level): + indexer = self.labels[level] + level_index = self.levels[level] + + if mapper is not None: + # Handle group mapping function and return + level_values = self.levels[level].take(indexer) + grouper = level_values.map(mapper) + return grouper, None, None + + labels, uniques = algos.factorize(indexer, sort=True) + + if len(uniques) > 0 and uniques[0] == -1: + # Handle NAs + mask = indexer != -1 + ok_labels, uniques = algos.factorize(indexer[mask], + sort=True) + + labels = np.empty(len(indexer), dtype=indexer.dtype) + labels[mask] = ok_labels + labels[~mask] = -1 + + if len(uniques) < len(level_index): + # Remove unobserved levels from level_index + level_index = level_index.take(uniques) + + grouper = level_index.take(labels) + + return grouper, labels, level_index + @property def _constructor(self): return MultiIndex.from_tuples diff --git a/pandas/io/data.py b/pandas/io/data.py index e76790a6ab98b..09c7aef0cde1a 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -1,6 +1,6 @@ raise ImportError( "The pandas.io.data module is moved to a separate package " "(pandas-datareader). After installing the pandas-datareader package " - "(https://github.com/pydata/pandas-datareader), you can change " + "(https://github.com/pandas-dev/pandas-datareader), you can change " "the import ``from pandas.io import data, wb`` to " "``from pandas_datareader import data, wb``.") diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index d6f8660f20ef6..8038cc500f6cd 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -236,7 +236,7 @@ def get_user_account_credentials(self): return credentials def get_service_account_credentials(self): - # Bug fix for https://github.com/pydata/pandas/issues/12572 + # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 # We need to know that a supported version of oauth2client is installed # Test that either of the following is installed: # - SignedJwtAssertionCredentials from oauth2client.client diff --git a/pandas/io/json.py b/pandas/io/json.py index e697351484f68..1e258101a5d86 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -605,20 +605,9 @@ def _convert_to_line_delimits(s): if not s[0] == '[' and s[-1] == ']': return s s = s[1:-1] - num_open_brackets_seen = 0 - commas_to_replace = [] - for idx, char in enumerate(s): # iter through to find all - if char == ',': # commas that should be \n - if num_open_brackets_seen == 0: - commas_to_replace.append(idx) - elif char == '{': - num_open_brackets_seen += 1 - elif char == '}': - num_open_brackets_seen -= 1 - s_arr = np.array(list(s)) # Turn to an array to set - s_arr[commas_to_replace] = '\n' # all commas at once. - s = ''.join(s_arr) - return s + + from pandas.lib import convert_json_to_lines + return convert_json_to_lines(s) def nested_to_record(ds, prefix="", level=0): diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 47bdd25572fc7..117ac2324d0e0 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -767,7 +767,7 @@ def test_round_trip_exception_(self): @network def test_url(self): - url = 'https://api.github.com/repos/pydata/pandas/issues?per_page=5' + url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa result = read_json(url, convert_dates=True) for c in ['created_at', 'closed_at', 'updated_at']: self.assertEqual(result[c].dtype, 'datetime64[ns]') @@ -962,6 +962,12 @@ def test_to_jsonl(self): expected = '{"a":1,"b":2}\n{"a":1,"b":2}' self.assertEqual(result, expected) + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) + result = df.to_json(orient="records", lines=True) + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + self.assertEqual(result, expected) + assert_frame_equal(pd.read_json(result, lines=True), df) + def test_latin_encoding(self): if compat.PY2: self.assertRaisesRegexp( diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 0b59b695e1dca..0219e16391be8 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -629,7 +629,7 @@ def test_read_csv_parse_simple_list(self): @tm.network def test_url(self): # HTTP(S) - url = ('https://raw.github.com/pydata/pandas/master/' + url = ('https://raw.github.com/pandas-dev/pandas/master/' 'pandas/io/tests/parser/data/salary.table.csv') url_table = self.read_table(url) dirpath = tm.get_data_path() diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index 8b8a6de36fc03..7e2f039853e2f 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -23,7 +23,7 @@ def setUp(self): @tm.network def test_url_gz(self): - url = ('https://raw.github.com/pydata/pandas/' + url = ('https://raw.github.com/pandas-dev/pandas/' 'master/pandas/io/tests/parser/data/salary.table.gz') url_table = read_table(url, compression="gzip", engine="python") tm.assert_frame_equal(url_table, self.local_table) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index d163b05aa01d4..998e71076b7c0 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -543,7 +543,7 @@ def test_read_xlrd_Book(self): @tm.network def test_read_from_http_url(self): - url = ('https://raw.github.com/pydata/pandas/master/' + url = ('https://raw.github.com/pandas-dev/pandas/master/' 'pandas/io/tests/data/test1' + self.ext) url_table = read_excel(url) local_table = self.get_exceldf('test1') diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 0ea4b5204e150..cca1580b84195 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -150,7 +150,7 @@ def _test_imports(): raise ImportError( "pandas requires httplib2 for Google BigQuery support") - # Bug fix for https://github.com/pydata/pandas/issues/12572 + # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 # We need to know that a supported version of oauth2client is installed # Test that either of the following is installed: # - SignedJwtAssertionCredentials from oauth2client.client @@ -651,7 +651,7 @@ def test_download_dataset_larger_than_200k_rows(self): self.assertEqual(len(df.drop_duplicates()), test_size) def test_zero_rows(self): - # Bug fix for https://github.com/pydata/pandas/issues/10273 + # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 df = gbq.read_gbq("SELECT title, id " "FROM [publicdata:samples.wikipedia] " "WHERE timestamp=-9999999", diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index cf61ad9a35935..91042775ba19d 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -544,7 +544,7 @@ def test_sparse_frame(self): class TestCompression(TestPackers): - """See https://github.com/pydata/pandas/pull/9783 + """See https://github.com/pandas-dev/pandas/pull/9783 """ def setUp(self): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 198a4017b5af7..af8989baabbc0 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1610,7 +1610,7 @@ def test_double_precision(self): def test_connectable_issue_example(self): # This tests the example raised in issue - # https://github.com/pydata/pandas/issues/10104 + # https://github.com/pandas-dev/pandas/issues/10104 def foo(connection): query = 'SELECT test_foo_data FROM test_foo_data' diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 5dc4d9ce1adc4..2183290c7e074 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,6 +1,6 @@ raise ImportError( "The pandas.io.wb module is moved to a separate package " "(pandas-datareader). After installing the pandas-datareader package " - "(https://github.com/pydata/pandas-datareader), you can change " + "(https://github.com/pandas-dev/pandas-datareader), you can change " "the import ``from pandas.io import data, wb`` to " "``from pandas_datareader import data, wb``.") diff --git a/pandas/lib.pyx b/pandas/lib.pyx index e7672de5c835e..b56a02b245d69 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -1087,6 +1087,44 @@ def string_array_replace_from_nan_rep( return arr +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(object arr): + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, in_quotes = 0, length + ndarray[uint8_t] narr + unsigned char v, comma, left_bracket, right_brack, newline + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + v = narr[i] + if v == quote and i > 0 and narr[i - 1] != backslash: + in_quotes = ~in_quotes + if v == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif v == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif v == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tostring().decode('utf-8') + + @cython.boundscheck(False) @cython.wraparound(False) def write_csv_rows(list data, ndarray data_index, diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 3083750e582fc..2fec04b9c1aa3 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -144,7 +144,7 @@ def test_set_properties_subset(self): self.assertEqual(result, expected) def test_empty_index_name_doesnt_display(self): - # https://github.com/pydata/pandas/pull/12090#issuecomment-180695902 + # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]}) result = df.style._translate() @@ -175,7 +175,7 @@ def test_empty_index_name_doesnt_display(self): self.assertEqual(result['head'], expected) def test_index_name(self): - # https://github.com/pydata/pandas/issues/11655 + # https://github.com/pandas-dev/pandas/issues/11655 df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]}) result = df.set_index('A').style._translate() @@ -195,7 +195,7 @@ def test_index_name(self): self.assertEqual(result['head'], expected) def test_multiindex_name(self): - # https://github.com/pydata/pandas/issues/11655 + # https://github.com/pandas-dev/pandas/issues/11655 df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]}) result = df.set_index(['A', 'B']).style._translate() @@ -217,7 +217,7 @@ def test_multiindex_name(self): self.assertEqual(result['head'], expected) def test_numeric_columns(self): - # https://github.com/pydata/pandas/issues/12125 + # https://github.com/pandas-dev/pandas/issues/12125 # smoke test for _translate df = pd.DataFrame({0: [1, 2, 3]}) df.style._translate() diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index b7cd8a1c01224..81aa694577fb5 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -347,6 +347,65 @@ def test_concat_named_keys(self): names=[None, None])) assert_frame_equal(concatted_unnamed, expected_unnamed) + def test_concat_axis_parameter(self): + # GH 14369 + df1 = pd.DataFrame({'A': [0.1, 0.2]}, index=range(2)) + df2 = pd.DataFrame({'A': [0.3, 0.4]}, index=range(2)) + + # Index/row/0 DataFrame + expected_index = pd.DataFrame( + {'A': [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) + + concatted_index = pd.concat([df1, df2], axis='index') + assert_frame_equal(concatted_index, expected_index) + + concatted_row = pd.concat([df1, df2], axis='rows') + assert_frame_equal(concatted_row, expected_index) + + concatted_0 = pd.concat([df1, df2], axis=0) + assert_frame_equal(concatted_0, expected_index) + + # Columns/1 DataFrame + expected_columns = pd.DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=['A', 'A']) + + concatted_columns = pd.concat([df1, df2], axis='columns') + assert_frame_equal(concatted_columns, expected_columns) + + concatted_1 = pd.concat([df1, df2], axis=1) + assert_frame_equal(concatted_1, expected_columns) + + series1 = pd.Series([0.1, 0.2]) + series2 = pd.Series([0.3, 0.4]) + + # Index/row/0 Series + expected_index_series = pd.Series( + [0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1]) + + concatted_index_series = pd.concat([series1, series2], axis='index') + assert_series_equal(concatted_index_series, expected_index_series) + + concatted_row_series = pd.concat([series1, series2], axis='rows') + assert_series_equal(concatted_row_series, expected_index_series) + + concatted_0_series = pd.concat([series1, series2], axis=0) + assert_series_equal(concatted_0_series, expected_index_series) + + # Columns/1 Series + expected_columns_series = pd.DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1]) + + concatted_columns_series = pd.concat( + [series1, series2], axis='columns') + assert_frame_equal(concatted_columns_series, expected_columns_series) + + concatted_1_series = pd.concat([series1, series2], axis=1) + assert_frame_equal(concatted_1_series, expected_columns_series) + + # Testing ValueError + with assertRaisesRegexp(ValueError, 'No axis named'): + pd.concat([series1, series2], axis='something') + class TestDataFrameCombineFirst(tm.TestCase, TestData): diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 2cb62a60f885b..9ef2802cb950f 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -392,7 +392,7 @@ def test_boolean_selection(self): def test_indexing_with_category(self): - # https://github.com/pydata/pandas/issues/12564 + # https://github.com/pandas-dev/pandas/issues/12564 # consistent result if comparing as Dataframe cat = DataFrame({'A': ['foo', 'bar', 'baz']}) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 333792c5ffdb2..0916693ade2ce 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -100,7 +100,7 @@ def test_boxplot_return_type_none(self): @slow def test_boxplot_return_type_legacy(self): - # API change in https://github.com/pydata/pandas/pull/7096 + # API change in https://github.com/pandas-dev/pandas/pull/7096 import matplotlib as mpl # noqa df = DataFrame(randn(6, 4), diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 4d0c1e9213b17..87cf89ebf0a9d 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -84,7 +84,7 @@ def test_plot(self): # We have to redo it here because _check_plot_works does two plots, # once without an ax kwarg and once with an ax kwarg and the new sharex # behaviour does not remove the visibility of the latter axis (as ax is - # present). see: https://github.com/pydata/pandas/issues/9737 + # present). see: https://github.com/pandas-dev/pandas/issues/9737 axes = df.plot(subplots=True, title='blah') self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -927,7 +927,7 @@ def test_plot_scatter_with_c(self): # Ensure that we can pass an np.array straight through to matplotlib, # this functionality was accidentally removed previously. - # See https://github.com/pydata/pandas/issues/8852 for bug report + # See https://github.com/pandas-dev/pandas/issues/8852 for bug report # # Exercise colormap path and non-colormap path as they are independent # @@ -2115,7 +2115,7 @@ def test_pie_df_nan(self): self.assertEqual(result, expected) # legend labels # NaN's not included in legend with subplots - # see https://github.com/pydata/pandas/issues/8390 + # see https://github.com/pandas-dev/pandas/issues/8390 self.assertEqual([x.get_text() for x in ax.get_legend().get_texts()], base_expected[:i] + base_expected[i + 1:]) @@ -2336,9 +2336,9 @@ def _check_errorbar_color(containers, expected, has_err='has_xerr'): @slow def test_sharex_and_ax(self): - # https://github.com/pydata/pandas/issues/9737 using gridspec, the axis - # in fig.get_axis() are sorted differently than pandas expected them, - # so make sure that only the right ones are removed + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed import matplotlib.pyplot as plt plt.close('all') gs, axes = _generate_4_axes_via_gridspec() @@ -2388,9 +2388,9 @@ def _check(axes): @slow def test_sharey_and_ax(self): - # https://github.com/pydata/pandas/issues/9737 using gridspec, the axis - # in fig.get_axis() are sorted differently than pandas expected them, - # so make sure that only the right ones are removed + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed import matplotlib.pyplot as plt gs, axes = _generate_4_axes_via_gridspec() diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 8f2ab0ed28839..ed441f2f85572 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -273,7 +273,7 @@ def f(): self.assertRaises(com.SettingWithCopyError, f) def test_dt_accessor_no_new_attributes(self): - # https://github.com/pydata/pandas/issues/10673 + # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index f688ec2d43789..086946d05d7a6 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1412,7 +1412,7 @@ def tester(a, b): # NotImplemented # this is an alignment issue; these are equivalent - # https://github.com/pydata/pandas/issues/5284 + # https://github.com/pandas-dev/pandas/issues/5284 self.assertRaises(ValueError, lambda: d.__and__(s, axis='columns')) self.assertRaises(ValueError, tester, s, d) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 092e02ee261a0..f89f41abd0d35 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -595,7 +595,7 @@ def test_categorical_zeroes(self): tm.assert_series_equal(result, expected, check_index_type=True) def test_dropna(self): - # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328 + # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( pd.Series([True, True, False]).value_counts(dropna=True), diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a494a0d53b123..f01fff035a3c5 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -191,7 +191,7 @@ def f(): cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) self.assertTrue(is_integer_dtype(cat.categories)) - # https://github.com/pydata/pandas/issues/3678 + # https://github.com/pandas-dev/pandas/issues/3678 cat = pd.Categorical([np.nan, 1, 2, 3]) self.assertTrue(is_integer_dtype(cat.categories)) @@ -618,7 +618,7 @@ def test_describe(self): index=exp_index) tm.assert_frame_equal(desc, expected) - # https://github.com/pydata/pandas/issues/3678 + # https://github.com/pandas-dev/pandas/issues/3678 # describe should work with NaN cat = pd.Categorical([np.nan, 1, 2, 2]) desc = cat.describe() @@ -1547,7 +1547,7 @@ def test_memory_usage(self): self.assertTrue(abs(diff) < 100) def test_searchsorted(self): - # https://github.com/pydata/pandas/issues/8420 + # https://github.com/pandas-dev/pandas/issues/8420 s1 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk']) s2 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk', 'donuts']) c1 = pd.Categorical(s1, ordered=True) @@ -1633,7 +1633,7 @@ def test_reflected_comparison_with_scalars(self): np.array([False, True, True])) def test_comparison_with_unknown_scalars(self): - # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 # and following comparisons with scalars not in categories should raise # for unequal comps, but not for equal/not equal cat = pd.Categorical([1, 2, 3], ordered=True) @@ -3829,7 +3829,7 @@ def f(): self.assertRaises(TypeError, f) - # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 # and following comparisons with scalars not in categories should raise # for unequal comps, but not for equal/not equal cat = Series(Categorical(list("abc"), ordered=True)) @@ -4303,14 +4303,14 @@ def test_cat_accessor_api(self): self.assertFalse(hasattr(invalid, 'cat')) def test_cat_accessor_no_new_attributes(self): - # https://github.com/pydata/pandas/issues/10673 + # https://github.com/pandas-dev/pandas/issues/10673 c = Series(list('aabbcde')).astype('category') with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): c.cat.xlabel = "a" def test_str_accessor_api_for_categorical(self): - # https://github.com/pydata/pandas/issues/10661 + # https://github.com/pandas-dev/pandas/issues/10661 from pandas.core.strings import StringMethods s = Series(list('aabb')) s = s + " " + s @@ -4385,7 +4385,7 @@ def test_str_accessor_api_for_categorical(self): self.assertFalse(hasattr(invalid, 'str')) def test_dt_accessor_api_for_categorical(self): - # https://github.com/pydata/pandas/issues/10661 + # https://github.com/pandas-dev/pandas/issues/10661 from pandas.tseries.common import Properties from pandas.tseries.index import date_range, DatetimeIndex from pandas.tseries.period import period_range, PeriodIndex diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 62ad4c5aa4338..ea226851c9101 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -427,7 +427,7 @@ def f3(key): def test_option_context_scope(self): # Ensure that creating a context does not affect the existing # environment as it is supposed to be used with the `with` statement. - # See https://github.com/pydata/pandas/issues/8514 + # See https://github.com/pandas-dev/pandas/issues/8514 original_value = 60 context_value = 10 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 01c1d48c6d5c0..f3791ee1d5c91 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -458,6 +458,39 @@ def test_grouper_creation_bug(self): expected = s.groupby(level='one').sum() assert_series_equal(result, expected) + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3)]) + idx.names = ['outer', 'inner'] + df_multi = pd.DataFrame({"A": np.arange(6), + 'B': ['one', 'one', 'two', + 'two', 'one', 'one']}, + index=idx) + result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_multi.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_multi.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index('outer') + result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_single.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_single.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + def test_grouper_getting_correct_binner(self): # GH 10063 @@ -6443,7 +6476,7 @@ def test_transform_doesnt_clobber_ints(self): def test_groupby_categorical_two_columns(self): - # https://github.com/pydata/pandas/issues/8138 + # https://github.com/pandas-dev/pandas/issues/8138 d = {'cat': pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], ordered=True), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index f00fdd196abea..be634228b1b6e 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1004,13 +1004,20 @@ def prng(self): def test_int64_add_overflow(): # see gh-14068 - msg = "too (big|large) to convert" + msg = "Overflow in int64 addition" m = np.iinfo(np.int64).max + n = np.iinfo(np.int64).min with tm.assertRaisesRegexp(OverflowError, msg): nanops._checked_add_with_arr(np.array([m, m]), m) with tm.assertRaisesRegexp(OverflowError, msg): nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m])) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([n, n]), n) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n])) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n])) with tm.assertRaisesRegexp(OverflowError, msg): with tm.assert_produces_warning(RuntimeWarning): nanops._checked_add_with_arr(np.array([m, m]), diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 4019bbe20ea1a..9a3505c3421e0 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2604,7 +2604,7 @@ def test_cat_on_filtered_index(self): self.assertEqual(str_multiple.loc[1], '2011 2 2') def test_str_cat_raises_intuitive_error(self): - # https://github.com/pydata/pandas/issues/11334 + # https://github.com/pandas-dev/pandas/issues/11334 s = Series(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" with tm.assertRaisesRegexp(ValueError, message): @@ -2661,7 +2661,7 @@ def test_index_str_accessor_visibility(self): idx.str def test_str_accessor_no_new_attributes(self): - # https://github.com/pydata/pandas/issues/10673 + # https://github.com/pandas-dev/pandas/issues/10673 s = Series(list('aabbcde')) with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index a8c43195f5552..ce7f8908d7506 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1283,7 +1283,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, argument, unless it is passed, in which case the values will be selected (see below). Any None objects will be dropped silently unless they are all None in which case a ValueError will be raised - axis : {0, 1, ...}, default 0 + axis : {0/'index', 1/'columns'}, default 0 The axis to concatenate along join : {'inner', 'outer'}, default 'outer' How to handle indexes on other axis(es) @@ -1411,6 +1411,12 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, sample = objs[0] self.objs = objs + # Standardize axis parameter to int + if isinstance(sample, Series): + axis = DataFrame()._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, DataFrame) if self._is_frame: diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 7fd0b1044f9d7..d46dc4d355b4c 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -722,7 +722,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, >>> from pandas import read_csv >>> from pandas.tools.plotting import parallel_coordinates >>> from matplotlib import pyplot as plt - >>> df = read_csv('https://raw.github.com/pydata/pandas/master' + >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' '/pandas/tests/data/iris.csv') >>> parallel_coordinates(df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')) @@ -2773,7 +2773,7 @@ def plot_group(keys, values, ax): if by is not None: # Prefer array return type for 2-D plots to match the subplot layout - # https://github.com/pydata/pandas/pull/12216#issuecomment-241175580 + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 result = _grouped_plot_by_column(plot_group, data, columns=columns, by=by, grid=grid, figsize=figsize, ax=ax, layout=layout, diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index f1a209053445a..d02c403cb3c66 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1281,7 +1281,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. # - # See https://github.com/pydata/pandas/issues/8683 + # See https://github.com/pandas-dev/pandas/issues/8683 first_tzinfo = first.tzinfo first = first.tz_localize(None) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index b3da62c8d2db5..1735ac4e2efa5 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -4606,7 +4606,7 @@ def test_parse_time_string(self): self.assertEqual(reso, reso_lower) def test_parse_time_quarter_w_dash(self): - # https://github.com/pydata/pandas/issue/9688 + # https://github.com/pandas-dev/pandas/issue/9688 pairs = [('1988-Q2', '1988Q2'), ('2Q-1988', '2Q1988'), ] for dashed, normal in pairs: diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 204808dd510a0..9d3d27f3224b4 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1678,7 +1678,7 @@ def test_resample_anchored_multiday(self): # start date gets used to determine the offset. Fixes issue where # a one day period is not a multiple of the frequency. # - # See: https://github.com/pydata/pandas/issues/8683 + # See: https://github.com/pandas-dev/pandas/issues/8683 index = pd.date_range( '2014-10-14 23:06:23.206', periods=3, freq='400L' diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 38e210d698035..f0d14014d6559 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1957,6 +1957,8 @@ def test_add_overflow(self): to_timedelta(106580, 'D') + Timestamp('2000') with tm.assertRaisesRegexp(OverflowError, msg): Timestamp('2000') + to_timedelta(106580, 'D') + + msg = "Overflow in int64 addition" with tm.assertRaisesRegexp(OverflowError, msg): to_timedelta([106580], 'D') + Timestamp('2000') with tm.assertRaisesRegexp(OverflowError, msg): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index ac48fcc2551ea..f640b3974b360 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -5514,22 +5514,6 @@ def test_second(self): class TestDaysInMonth(tm.TestCase): - def test_coerce_deprecation(self): - - # deprecation of coerce - with tm.assert_produces_warning(FutureWarning): - to_datetime('2015-02-29', coerce=True) - with tm.assert_produces_warning(FutureWarning): - self.assertRaises(ValueError, - lambda: to_datetime('2015-02-29', coerce=False)) - - # multiple arguments - for e, c in zip(['raise', 'ignore', 'coerce'], [True, False]): - with tm.assert_produces_warning(FutureWarning): - self.assertRaises(TypeError, - lambda: to_datetime('2015-02-29', errors=e, - coerce=c)) - # tests for issue #10154 def test_day_not_in_month_coerce(self): self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index a85a606075911..00e8ee631f463 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -17,7 +17,8 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, set_timezone +from pandas.util.testing import (assert_frame_equal, assert_series_equal, + set_timezone) from pandas.compat import lrange, zip try: @@ -535,6 +536,44 @@ def test_ambiguous_nat(self): # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] self.assert_numpy_array_equal(di_test.values, localized.values) + def test_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # gh-14402 + t = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + def f(): + t.tz_localize('US/Central') + self.assertRaises(pytz.AmbiguousTimeError, f) + + result = t.tz_localize('US/Central', ambiguous=True) + self.assertEqual(result, expected0) + + result = t.tz_localize('US/Central', ambiguous=False) + self.assertEqual(result, expected1) + + s = Series([t]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + def f(): + s.dt.tz_localize('US/Central') + self.assertRaises(pytz.AmbiguousTimeError, f) + + result = s.dt.tz_localize('US/Central', ambiguous=True) + assert_series_equal(result, expected0) + + result = s.dt.tz_localize('US/Central', ambiguous=[True]) + assert_series_equal(result, expected0) + + result = s.dt.tz_localize('US/Central', ambiguous=False) + assert_series_equal(result, expected1) + + result = s.dt.tz_localize('US/Central', ambiguous=[False]) + assert_series_equal(result, expected1) + def test_nonexistent_raise_coerce(self): # See issue 13057 from pytz.exceptions import NonExistentTimeError @@ -629,14 +668,14 @@ def test_localized_at_time_between_time(self): result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr( 'US/Eastern')) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(self.tzstr('US/Eastern')) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) def test_string_index_alias_tz_aware(self): @@ -723,7 +762,7 @@ def test_frame_no_datetime64_dtype(self): result = df.get_dtype_counts().sort_index() expected = Series({'datetime64[ns]': 2, str(tz_expected): 2}).sort_index() - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) def test_hongkong_tz_convert(self): # #1673 @@ -903,7 +942,7 @@ def test_utc_with_system_utc(self): def test_tz_convert_hour_overflow_dst(self): # Regression test for: - # https://github.com/pydata/pandas/issues/13306 + # https://github.com/pandas-dev/pandas/issues/13306 # sorted case US/Eastern -> UTC ts = ['2008-05-12 09:50:00', @@ -943,7 +982,7 @@ def test_tz_convert_hour_overflow_dst(self): def test_tz_convert_hour_overflow_dst_timestamps(self): # Regression test for: - # https://github.com/pydata/pandas/issues/13306 + # https://github.com/pandas-dev/pandas/issues/13306 tz = self.tzstr('US/Eastern') @@ -985,7 +1024,7 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): def test_tslib_tz_convert_trans_pos_plus_1__bug(self): # Regression test for tslib.tz_convert(vals, tz1, tz2). - # See https://github.com/pydata/pandas/issues/4496 for details. + # See https://github.com/pandas-dev/pandas/issues/4496 for details. for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq) @@ -1324,7 +1363,7 @@ def test_append_aware(self): exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], tz='US/Eastern') exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) self.assertEqual(ts_result.index.tz, rng1.tz) rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') @@ -1336,7 +1375,7 @@ def test_append_aware(self): exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], tz='UTC') exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) utc = rng1.tz self.assertEqual(utc, ts_result.index.tz) @@ -1352,7 +1391,7 @@ def test_append_aware(self): exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), Timestamp('1/1/2011 02:00', tz='US/Central')]) exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) def test_append_dst(self): rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', @@ -1368,7 +1407,7 @@ def test_append_dst(self): '2016-08-01 02:00', '2016-08-01 03:00'], tz='US/Eastern') exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) self.assertEqual(ts_result.index.tz, rng1.tz) def test_append_aware_naive(self): @@ -1429,7 +1468,7 @@ def test_arith_utc_convert(self): expected = uts1 + uts2 self.assertEqual(result.index.tz, pytz.UTC) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) def test_intersection(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 2ca3fcea8005b..9bf39652a4e00 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -11,12 +11,9 @@ is_timedelta64_dtype, is_list_like) from pandas.types.generic import ABCSeries, ABCIndexClass -from pandas.util.decorators import deprecate_kwarg -@deprecate_kwarg(old_arg_name='coerce', new_arg_name='errors', - mapping={True: 'coerce', False: 'raise'}) -def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None): +def to_timedelta(arg, unit='ns', box=True, errors='raise'): """ Convert argument to timedelta diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 93d35ff964e69..637e70b76de98 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -16,7 +16,6 @@ from pandas.types.missing import notnull import pandas.compat as compat -from pandas.util.decorators import deprecate_kwarg _DATEUTIL_LEXER_SPLIT = None try: @@ -175,10 +174,8 @@ def _guess_datetime_format_for_array(arr, **kwargs): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) -@deprecate_kwarg(old_arg_name='coerce', new_arg_name='errors', - mapping={True: 'coerce', False: 'raise'}) def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, - utc=None, box=True, format=None, exact=True, coerce=None, + utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False): """ Convert argument to datetime. diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 9073ad0abd535..bab45595cd60f 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -4155,6 +4155,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, """ cdef: ndarray[int64_t] trans, deltas, idx_shifted + ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata int64_t v, left, right @@ -4190,11 +4191,18 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, infer_dst = True elif ambiguous == 'NaT': fill = True + elif isinstance(ambiguous, bool): + is_dst = True + if ambiguous: + ambiguous_array = np.ones(len(vals), dtype=bool) + else: + ambiguous_array = np.zeros(len(vals), dtype=bool) elif hasattr(ambiguous, '__iter__'): is_dst = True if len(ambiguous) != len(vals): raise ValueError( "Length of ambiguous bool-array must be the same size as vals") + ambiguous_array = np.asarray(ambiguous) trans, deltas, typ = _get_dst_info(tz) @@ -4286,7 +4294,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if infer_dst and dst_hours[i] != NPY_NAT: result[i] = dst_hours[i] elif is_dst: - if ambiguous[i]: + if ambiguous_array[i]: result[i] = left else: result[i] = right diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index f00273bc75199..49273bacccf98 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -19,7 +19,7 @@ parser.add_argument('-m', '--module', metavar='MODULE', type=str,required=True, help='name of package to import and examine',action='store') parser.add_argument('-G', '--github_repo', metavar='REPO', type=str,required=False, - help='github project where the the code lives, e.g. "pydata/pandas"', + help='github project where the the code lives, e.g. "pandas-dev/pandas"', default=None,action='store') args = parser.parse_args() diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index 02ba4f57c189d..7e4ffca59a0ab 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -46,7 +46,7 @@ def get_issues(): def _get_page(page_number): - gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' + gh_url = ('https://api.github.com/repos/pandas-dev/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number with urlopen(gh_url) as resp: rs = resp.readlines()[0] diff --git a/scripts/touchup_gh_issues.py b/scripts/touchup_gh_issues.py index 96ee220f55a02..8aa6d426156f0 100755 --- a/scripts/touchup_gh_issues.py +++ b/scripts/touchup_gh_issues.py @@ -14,7 +14,7 @@ pat = "((?:\s*GH\s*)?)#(\d{3,4})([^_]|$)?" rep_pat = r"\1GH\2_\3" -anchor_pat = ".. _GH{id}: https://github.com/pydata/pandas/issues/{id}" +anchor_pat = ".. _GH{id}: https://github.com/pandas-dev/pandas/issues/{id}" section_pat = "^pandas\s[\d\.]+\s*$" diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index c14a1795f01e0..143d943b9eadf 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -192,7 +192,7 @@ def get_build_results(build): return convert_json_to_df(r_url) -def get_all_results(repo_id=53976): # travis pydata/pandas id +def get_all_results(repo_id=53976): # travis pandas-dev/pandas id """Fetches the VBENCH results for all travis builds, and returns a list of result df unsuccesful individual vbenches are dropped. diff --git a/vb_suite/suite.py b/vb_suite/suite.py index 70a6278c0852d..45053b6610896 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -67,7 +67,7 @@ TMP_DIR = config.get('setup', 'tmp_dir') except: REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) - REPO_URL = 'git@github.com:pydata/pandas.git' + REPO_URL = 'git@github.com:pandas-dev/pandas.git' DB_PATH = os.path.join(REPO_PATH, 'vb_suite/benchmarks.db') TMP_DIR = os.path.join(HOME, 'tmp/vb_pandas') @@ -138,7 +138,7 @@ def generate_rst_files(benchmarks): The ``.pandas_vb_common`` setup script can be found here_ -.. _here: https://github.com/pydata/pandas/tree/master/vb_suite +.. _here: https://github.com/pandas-dev/pandas/tree/master/vb_suite Produced on a machine with