Skip to content

Commit 51a9d4a

Browse files
committed
Merge master
2 parents 95a9503 + f5cc078 commit 51a9d4a

File tree

175 files changed

+2078
-1363
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+2078
-1363
lines changed

.github/CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Whether you are a novice or experienced software developer, all contributions and suggestions are welcome!
44

5-
Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
5+
Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/development/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
66

77
## Getting Started
88

.travis.yml

+8
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ install:
8686
- ci/submit_cython_cache.sh
8787
- echo "install done"
8888

89+
before_script:
90+
# display server (for clipboard functionality) needs to be started here,
91+
# does not work if done in install:setup_env.sh (GH-26103)
92+
- export DISPLAY=":99.0"
93+
- echo "sh -e /etc/init.d/xvfb start"
94+
- sh -e /etc/init.d/xvfb start
95+
- sleep 3
96+
8997
script:
9098
- echo "script start"
9199
- source activate pandas-dev

asv_bench/benchmarks/io/csv.py

+56-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import numpy as np
55
import pandas.util.testing as tm
6-
from pandas import DataFrame, Categorical, date_range, read_csv
6+
from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
77
from pandas.io.parsers import _parser_defaults
88
from io import StringIO
99

@@ -96,6 +96,35 @@ def time_read_csv(self, infer_datetime_format, format):
9696
infer_datetime_format=infer_datetime_format)
9797

9898

99+
class ReadCSVConcatDatetime(StringIORewind):
100+
101+
iso8601 = '%Y-%m-%d %H:%M:%S'
102+
103+
def setup(self):
104+
rng = date_range('1/1/2000', periods=50000, freq='S')
105+
self.StringIO_input = StringIO('\n'.join(
106+
rng.strftime(self.iso8601).tolist()))
107+
108+
def time_read_csv(self):
109+
read_csv(self.data(self.StringIO_input),
110+
header=None, names=['foo'], parse_dates=['foo'],
111+
infer_datetime_format=False)
112+
113+
114+
class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
115+
116+
params = (['nan', '0', ''],)
117+
param_names = ['bad_date_value']
118+
119+
def setup(self, bad_date_value):
120+
self.StringIO_input = StringIO(('%s,\n' % bad_date_value) * 50000)
121+
122+
def time_read_csv(self, bad_date_value):
123+
read_csv(self.data(self.StringIO_input),
124+
header=None, names=['foo', 'bar'], parse_dates=['foo'],
125+
infer_datetime_format=False)
126+
127+
99128
class ReadCSVSkipRows(BaseIO):
100129

101130
fname = '__test__.csv'
@@ -273,7 +302,7 @@ def mem_parser_chunks(self):
273302

274303
class ReadCSVParseSpecialDate(StringIORewind):
275304
params = (['mY', 'mdY', 'hm'],)
276-
params_name = ['value']
305+
param_names = ['value']
277306
objects = {
278307
'mY': '01-2019\n10-2019\n02/2000\n',
279308
'mdY': '12/02/2010\n',
@@ -290,4 +319,29 @@ def time_read_special_date(self, value):
290319
names=['Date'], parse_dates=['Date'])
291320

292321

322+
class ParseDateComparison(StringIORewind):
323+
params = ([False, True],)
324+
param_names = ['cache_dates']
325+
326+
def setup(self, cache_dates):
327+
count_elem = 10000
328+
data = '12-02-2010\n' * count_elem
329+
self.StringIO_input = StringIO(data)
330+
331+
def time_read_csv_dayfirst(self, cache_dates):
332+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
333+
names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
334+
dayfirst=True)
335+
336+
def time_to_datetime_dayfirst(self, cache_dates):
337+
df = read_csv(self.data(self.StringIO_input),
338+
dtype={'date': str}, names=['date'])
339+
to_datetime(df['date'], cache=cache_dates, dayfirst=True)
340+
341+
def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
342+
df = read_csv(self.data(self.StringIO_input),
343+
dtype={'date': str}, names=['date'])
344+
to_datetime(df['date'], cache=cache_dates, format='%d-%m-%Y')
345+
346+
293347
from ..pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/io/parsers.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import numpy as np
2+
3+
from pandas._libs.tslibs.parsing import (
4+
_concat_date_cols, _does_string_look_like_datetime)
5+
6+
7+
class DoesStringLookLikeDatetime(object):
8+
9+
params = (['2Q2005', '0.0', '10000'],)
10+
param_names = ['value']
11+
12+
def setup(self, value):
13+
self.objects = [value] * 1000000
14+
15+
def time_check_datetimes(self, value):
16+
for obj in self.objects:
17+
_does_string_look_like_datetime(obj)
18+
19+
20+
class ConcatDateCols(object):
21+
22+
params = ([1234567890, 'AAAA'], [1, 2])
23+
param_names = ['value', 'dim']
24+
25+
def setup(self, value, dim):
26+
count_elem = 10000
27+
if dim == 1:
28+
self.object = (np.array([value] * count_elem),)
29+
if dim == 2:
30+
self.object = (np.array([value] * count_elem),
31+
np.array([value] * count_elem))
32+
33+
def time_check_concat(self, value, dim):
34+
_concat_date_cols(self.object)

ci/azure/windows.yml

+7-8
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,15 @@ jobs:
1717
CONDA_PY: "37"
1818

1919
steps:
20-
- task: CondaEnvironment@1
21-
inputs:
22-
updateConda: no
23-
packageSpecs: ''
24-
25-
- script: |
26-
ci\\incremental\\setup_conda_environment.cmd
27-
displayName: 'Before Install'
20+
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
21+
displayName: Add conda to PATH
22+
- script: conda update -q -n base conda
23+
displayName: Update conda
24+
- script: conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
25+
displayName: Create anaconda environment
2826
- script: |
2927
call activate pandas-dev
28+
call conda list
3029
ci\\incremental\\build.cmd
3130
displayName: 'Build'
3231
- script: |

ci/code_checks.sh

+4-9
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
169169
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
170170
RET=$(($RET + $?)) ; echo $MSG "DONE"
171171

172-
# Check that we use pytest.raises only as a context manager
173-
#
174-
# For any flake8-compliant code, the only way this regex gets
175-
# matched is if there is no "with" statement preceding "pytest.raises"
176-
MSG='Check for pytest.raises as context manager (a line starting with `pytest.raises` is invalid, needs a `with` to precede it)' ; echo $MSG
177-
MSG='TODO: This check is currently skipped because so many files fail this. Please enable when all are corrected (xref gh-24332)' ; echo $MSG
178-
# invgrep -R --include '*.py' -E '[[:space:]] pytest.raises' pandas/tests
179-
# RET=$(($RET + $?)) ; echo $MSG "DONE"
180-
181172
MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
182173
invgrep -R --include="*.rst" ".. code-block ::" doc/source
183174
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -239,6 +230,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
239230
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
240231
RET=$(($RET + $?)) ; echo $MSG "DONE"
241232

233+
MSG='Doctests datetimes.py' ; echo $MSG
234+
pytest -q --doctest-modules pandas/core/tools/datetimes.py
235+
RET=$(($RET + $?)) ; echo $MSG "DONE"
236+
242237
MSG='Doctests top-level reshaping functions' ; echo $MSG
243238
pytest -q --doctest-modules \
244239
pandas/core/reshape/concat.py \

ci/incremental/setup_conda_environment.cmd

-21
This file was deleted.

ci/run_with_env.cmd

-95
This file was deleted.

ci/setup_env.sh

-6
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,10 @@ echo "conda list"
118118
conda list
119119

120120
# Install DB for Linux
121-
export DISPLAY=":99."
122121
if [ ${TRAVIS_OS_NAME} == "linux" ]; then
123122
echo "installing dbs"
124123
mysql -e 'create database pandas_nosetest;'
125124
psql -c 'create database pandas_nosetest;' -U postgres
126-
127-
echo
128-
echo "sh -e /etc/init.d/xvfb start"
129-
sh -e /etc/init.d/xvfb start
130-
sleep 3
131125
else
132126
echo "not using dbs on non-linux"
133127
fi

doc/source/ecosystem.rst

+5
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ provides a familiar ``DataFrame`` interface for out-of-core, parallel and distri
285285

286286
Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow.
287287

288+
`Koalas <https://koalas.readthedocs.io/en/latest/>`__
289+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
290+
291+
Koalas provides a familiar pandas DataFrame interface on top of Apache Spark. It enables users to leverage multi-cores on one machine or a cluster of machines to speed up or scale their DataFrame code.
292+
288293
`Odo <http://odo.pydata.org>`__
289294
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
290295

doc/source/reference/frame.rst

+23
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,29 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
311311
DataFrame.boxplot
312312
DataFrame.hist
313313

314+
315+
.. _api.frame.sparse:
316+
317+
Sparse Accessor
318+
~~~~~~~~~~~~~~~
319+
320+
Sparse-dtype specific methods and attributes are provided under the
321+
``DataFrame.sparse`` accessor.
322+
323+
.. autosummary::
324+
:toctree: api/
325+
:template: autosummary/accessor_attribute.rst
326+
327+
DataFrame.sparse.density
328+
329+
.. autosummary::
330+
:toctree: api/
331+
332+
DataFrame.sparse.from_spmatrix
333+
DataFrame.sparse.to_coo
334+
DataFrame.sparse.to_dense
335+
336+
314337
Serialization / IO / Conversion
315338
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
316339
.. autosummary::

doc/source/user_guide/computation.rst

+6-5
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ which is equivalent to using weights
865865
866866
The difference between the above two variants arises because we are
867867
dealing with series which have finite history. Consider a series of infinite
868-
history:
868+
history, with ``adjust=True``:
869869

870870
.. math::
871871
@@ -884,10 +884,11 @@ and a ratio of :math:`1 - \alpha` we have
884884
&= \alpha x_t + (1 - \alpha)[x_{t-1} + (1 - \alpha) x_{t-2} + ...]\alpha\\
885885
&= \alpha x_t + (1 - \alpha) y_{t-1}
886886
887-
which shows the equivalence of the above two variants for infinite series.
888-
When ``adjust=True`` we have :math:`y_0 = x_0` and from the last
889-
representation above we have :math:`y_t = \alpha x_t + (1 - \alpha) y_{t-1}`,
890-
therefore there is an assumption that :math:`x_0` is not an ordinary value
887+
which is the same expression as ``adjust=False`` above and therefore
888+
shows the equivalence of the two variants for infinite series.
889+
When ``adjust=False``, we have :math:`y_0 = x_0` and
890+
:math:`y_t = \alpha x_t + (1 - \alpha) y_{t-1}`.
891+
Therefore, there is an assumption that :math:`x_0` is not an ordinary value
891892
but rather an exponentially weighted moment of the infinite series up to that
892893
point.
893894

0 commit comments

Comments
 (0)