Skip to content

Commit 321743f

Browse files
committed
2 parents 3d1ed79 + de46056 commit 321743f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+10233
-9188
lines changed

asv_bench/benchmarks/categoricals.py

+16
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ def time_fastpath(self):
4646
Categorical(self.codes, self.cat_idx, fastpath=True)
4747

4848

49+
class categorical_constructor_with_datetimes(object):
50+
goal_time = 0.2
51+
52+
def setup(self):
53+
self.datetimes = pd.Series(pd.date_range(
54+
'1995-01-01 00:00:00', periods=10000, freq='s'))
55+
56+
def time_datetimes(self):
57+
Categorical(self.datetimes)
58+
59+
def time_datetimes_with_nat(self):
60+
t = self.datetimes
61+
t.iloc[-1] = pd.NaT
62+
Categorical(t)
63+
64+
4965
class categorical_rendering(object):
5066
goal_time = 3e-3
5167

asv_bench/benchmarks/timeseries.py

+12-18
Original file line numberDiff line numberDiff line change
@@ -1059,33 +1059,27 @@ class timeseries_to_datetime_iso8601(object):
10591059
goal_time = 0.2
10601060

10611061
def setup(self):
1062-
self.N = 100000
1063-
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
1064-
if hasattr(Series, 'convert'):
1065-
Series.resample = Series.convert
1066-
self.ts = Series(np.random.randn(self.N), index=self.rng)
10671062
self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
10681063
self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
1064+
self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
1065+
self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
1066+
for x in self.rng]
10691067

10701068
def time_timeseries_to_datetime_iso8601(self):
10711069
to_datetime(self.strings)
10721070

1073-
1074-
class timeseries_to_datetime_iso8601_format(object):
1075-
goal_time = 0.2
1076-
1077-
def setup(self):
1078-
self.N = 100000
1079-
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
1080-
if hasattr(Series, 'convert'):
1081-
Series.resample = Series.convert
1082-
self.ts = Series(np.random.randn(self.N), index=self.rng)
1083-
self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
1084-
self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
1071+
def time_timeseries_to_datetime_iso8601_nosep(self):
1072+
to_datetime(self.strings_nosep)
10851073

10861074
def time_timeseries_to_datetime_iso8601_format(self):
10871075
to_datetime(self.strings, format='%Y-%m-%d %H:%M:%S')
10881076

1077+
def time_timeseries_to_datetime_iso8601_format_no_sep(self):
1078+
to_datetime(self.strings_nosep, format='%Y%m%d %H:%M:%S')
1079+
1080+
def time_timeseries_to_datetime_iso8601_tz_spaceformat(self):
1081+
to_datetime(self.strings_tz_space)
1082+
10891083

10901084
class timeseries_with_format_no_exact(object):
10911085
goal_time = 0.2
@@ -1160,4 +1154,4 @@ def setup(self):
11601154
self.cdayh = pd.offsets.CustomBusinessDay(calendar=self.hcal)
11611155

11621156
def time_timeseries_year_incr(self):
1163-
(self.date + self.year)
1157+
(self.date + self.year)

ci/requirements-2.7_DOC_BUILD.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ipython=3.2.1
1+
ipython
22
nbconvert
33
matplotlib
44
scipy

conda.recipe/meta.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@ build:
66
number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }}
77

88
source:
9-
path: ../../
9+
path: ../
1010

1111
requirements:
1212
build:
1313
- python
1414
- cython
1515
- numpy x.x
16-
- libpython # [py2k and win]
1716
- setuptools
1817
- pytz
1918
- python-dateutil

doc/source/install.rst

+31-43
Original file line numberDiff line numberDiff line change
@@ -192,32 +192,31 @@ installed), make sure you have `nose
192192

193193
::
194194

195-
$ nosetests pandas
196-
..........................................................................
197-
.......................S..................................................
198-
..........................................................................
199-
..........................................................................
200-
..........................................................................
201-
..........................................................................
202-
..........................................................................
203-
..........................................................................
204-
..........................................................................
205-
..........................................................................
206-
.................S........................................................
207-
....
195+
>>> import pandas as pd
196+
>>> pd.test()
197+
Running unit tests for pandas
198+
pandas version 0.18.0
199+
numpy version 1.10.2
200+
pandas is installed in pandas
201+
Python version 2.7.11 |Continuum Analytics, Inc.|
202+
(default, Dec 6 2015, 18:57:58) [GCC 4.2.1 (Apple Inc. build 5577)]
203+
nose version 1.3.7
204+
..................................................................S......
205+
........S................................................................
206+
.........................................................................
207+
208208
----------------------------------------------------------------------
209-
Ran 818 tests in 21.631s
209+
Ran 9252 tests in 368.339s
210210

211-
OK (SKIP=2)
211+
OK (SKIP=117)
212212

213213
Dependencies
214214
------------
215215

216216
* `setuptools <http://pythonhosted.org/setuptools>`__
217217
* `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
218-
* `python-dateutil <http://labix.org/python-dateutil>`__ 1.5 or higher
219-
* `pytz <http://pytz.sourceforge.net/>`__
220-
* Needed for time zone support
218+
* `python-dateutil <http://labix.org/python-dateutil>`__: 1.5 or higher
219+
* `pytz <http://pytz.sourceforge.net/>`__: Needed for time zone support
221220

222221
.. _install.recommended_dependencies:
223222

@@ -226,7 +225,7 @@ Recommended Dependencies
226225

227226
* `numexpr <https://github.com/pydata/numexpr>`__: for accelerating certain numerical operations.
228227
``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups.
229-
If installed, must be Version 2.1 or higher.
228+
If installed, must be Version 2.1 or higher. Version 2.4.6 or higher on Windows is highly recommended.
230229

231230
* `bottleneck <http://berkeleyanalytics.com/bottleneck>`__: for accelerating certain types of ``nan``
232231
evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups.
@@ -246,21 +245,16 @@ Optional Dependencies
246245
version. Version 0.19.1 or higher.
247246
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
248247
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
249-
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended.
250-
* Besides SQLAlchemy, you also need a database specific driver.
251-
Examples of such drivers are `psycopg2 <http://initd.org/psycopg/>`__ for PostgreSQL
252-
or `pymysql <https://github.com/PyMySQL/PyMySQL>`__ for MySQL. For
253-
`SQLite <https://docs.python.org/3.5/library/sqlite3.html>`__ this is
254-
included in Python's standard library by default.
255-
You can find an overview of supported drivers for each SQL dialect in the
256-
`SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__.
248+
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
249+
250+
- `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
251+
- `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
252+
- `SQLite <https://docs.python.org/3.5/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
253+
257254
* `matplotlib <http://matplotlib.sourceforge.net/>`__: for plotting
258-
* `statsmodels <http://statsmodels.sourceforge.net/>`__
259-
* Needed for parts of :mod:`pandas.stats`
260-
* `openpyxl <http://packages.python.org/openpyxl/>`__, `xlrd/xlwt <http://www.python-excel.org/>`__
261-
* Needed for Excel I/O
262-
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__
263-
* Alternative Excel writer
255+
* `statsmodels <http://statsmodels.sourceforge.net/>`__: Needed for parts of :mod:`pandas.stats`
256+
* `openpyxl <http://packages.python.org/openpyxl/>`__, `xlrd/xlwt <http://www.python-excel.org/>`__: Needed for Excel I/O
257+
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
264258
* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
265259
* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3
266260
access.
@@ -271,16 +265,10 @@ Optional Dependencies
271265
<http://www.pygtk.org/>`__, `xsel
272266
<http://www.vergenet.net/~conrad/software/xsel/>`__, or `xclip
273267
<http://sourceforge.net/projects/xclip/>`__: necessary to use
274-
:func:`~pandas.io.clipboard.read_clipboard`. Most package managers on Linux
275-
distributions will have xclip and/or xsel immediately available for
276-
installation.
268+
:func:`~pandas.io.clipboard.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
277269
* Google's `python-gflags <http://code.google.com/p/python-gflags/>`__
278-
and `google-api-python-client <http://github.com/google/google-api-python-client>`__
279-
* Needed for :mod:`~pandas.io.gbq`
280-
* `setuptools <https://pypi.python.org/pypi/setuptools/>`__
281-
* Needed for :mod:`~pandas.io.gbq` (specifically, it utilizes `pkg_resources`)
282-
* `httplib2 <http://pypi.python.org/pypi/httplib2>`__
283-
* Needed for :mod:`~pandas.io.gbq`
270+
and `google-api-python-client <http://github.com/google/google-api-python-client>`__: Needed for :mod:`~pandas.io.gbq`
271+
* `httplib2 <http://pypi.python.org/pypi/httplib2>`__: Needed for :mod:`~pandas.io.gbq`
284272
* One of the following combinations of libraries is needed to use the
285273
top-level :func:`~pandas.io.html.read_html` function:
286274

@@ -327,5 +315,5 @@ Optional Dependencies
327315

328316
Without the optional dependencies, many useful features will not
329317
work. Hence, it is highly recommended that you install these. A packaged
330-
distribution like `Enthought Canopy
318+
distribution like `Anaconda <http://docs.continuum.io/anaconda/>`__, or `Enthought Canopy
331319
<http://enthought.com/products/canopy>`__ may be worth considering.

doc/source/reshaping.rst

+21
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,27 @@ which level in the columns to stack:
228228
df2.stack('exp')
229229
df2.stack('animal')
230230
231+
Unstacking can result in missing values if subgroups do not have the same
232+
set of labels. By default, missing values will be replaced with the default
233+
fill value for that data type, ``NaN`` for float, ``NaT`` for datetimelike,
234+
etc. For integer types, by default data will converted to float and missing
235+
values will be set to ``NaN``.
236+
237+
.. ipython:: python
238+
239+
df3 = df.iloc[[0, 1, 4, 7], [1, 2]]
240+
df3
241+
df3.unstack()
242+
243+
.. versionadded: 0.18.0
244+
245+
Alternatively, unstack takes an optional ``fill_value`` argument, for specifying
246+
the value of missing data.
247+
248+
.. ipython:: python
249+
250+
df3.unstack(fill_value=-1e9)
251+
231252
With a MultiIndex
232253
~~~~~~~~~~~~~~~~~
233254

0 commit comments

Comments
 (0)