pandas-dev · gouthambs · Feb 19, 2014 · Feb 19, 2014 · Jan 30, 2014 · Feb 15, 2014
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+*~
 *.pyc
 *.pyo
 *.swp

diff --git a/.travis.yml b/.travis.yml
@@ -11,40 +11,65 @@ env:
   - secure: "PCzUFR8CHmw9lH84p4ygnojdF7Z8U5h7YfY0RyT+5K/aiQ1ZTU3ZkDTPI0/rR5FVMxsEEKEQKMcc5fvqW0PeD7Q2wRmluloKgT9w4EVEJ1ppKf7lITPcvZR2QgVOvjv4AfDtibLHFNiaSjzoqyJVjM4igjOu8WTlF3JfZcmOQjQ="
 
 matrix:
+    fast_finish: true
     include:
     - python: 2.6
       env:
       - NOSE_ARGS="not slow and not network and not disabled"
       - CLIPBOARD=xclip
       - LOCALE_OVERRIDE="it_IT.UTF-8"
-      - JOB_NAME: "26_nslow_nnet" # ScatterCI Build name, 20 chars max
+      - JOB_NAME: "26_nslow_nnet"
     - python: 2.7
       env:
       - NOSE_ARGS="slow and not network and not disabled"
       - LOCALE_OVERRIDE="zh_CN.GB18030"
       - FULL_DEPS=true
       - JOB_TAG=_LOCALE
-      - JOB_NAME: "27_slow_nnet_LOCALE" # ScatterCI Build name, 20 chars max
+      - JOB_NAME: "27_slow_nnet_LOCALE"
     - python: 2.7
       env:
       - NOSE_ARGS="not slow and not disabled"
       - FULL_DEPS=true
       - CLIPBOARD_GUI=gtk2
-      - JOB_NAME: "27_nslow" # ScatterCI Build name, 20 chars max
+      - JOB_NAME: "27_nslow"
       - DOC_BUILD=true # if rst files were changed, build docs in parallel with tests
     - python: 3.2
       env:
       - NOSE_ARGS="not slow and not disabled"
       - FULL_DEPS=true
       - CLIPBOARD_GUI=qt4
-      - JOB_NAME: "32_nslow"  # ScatterCI Build name, 20 chars max
+      - JOB_NAME: "32_nslow"
     - python: 3.3
       env:
       - NOSE_ARGS="not slow and not disabled"
       - FULL_DEPS=true
       - CLIPBOARD=xsel
-      - JOB_NAME: "33_nslow"  # ScatterCI Build name, 20 chars max
-
+      - JOB_NAME: "33_nslow"
+    - python: 2.7
+      env:
+      - NOSE_ARGS="not slow and not network and not disabled"
+      - JOB_NAME: "27_numpy_master"
+      - JOB_TAG=_NUMPY_DEV_master
+      - NUMPY_BUILD=master
+    - python: 2.7
+      env:
+      - NOSE_ARGS="not slow and not network and not disabled"
+      - JOB_NAME: "27_numpy_1.8.x"
+      - JOB_TAG=_NUMPY_DEV_1_8_x
+      - NUMPY_BUILD=maintenance/1.8.x
+    allow_failures:
+      - python: 2.7
+        env:
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - JOB_NAME: "27_numpy_master"
+        - JOB_TAG=_NUMPY_DEV_master
+        - NUMPY_BUILD=master
+      - python: 2.7
+        env:
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - JOB_NAME: "27_numpy_1.8.x"
+        - JOB_TAG=_NUMPY_DEV_1_8_x
+        - NUMPY_BUILD=maintenance/1.8.x
 
 # allow importing from site-packages,
 # so apt-get python-x works for system pythons

diff --git a/ci/install.sh b/ci/install.sh
@@ -31,28 +31,47 @@ edit_init
 python_major_version="${TRAVIS_PYTHON_VERSION:0:1}"
 [ "$python_major_version" == "2" ] && python_major_version=""
 
-pip install -I -U setuptools
-pip install wheel
+# fix these versions
+pip install -I pip==1.5.1
+pip install -I setuptools==2.2
+pip install wheel==0.22
 
 # comment this line to disable the fetching of wheel files
-base_url=http://cache27diy-cpycloud.rhcloud.com
+base_url=http://pandas.pydata.org/pandas-build/dev/wheels
+
 wheel_box=${TRAVIS_PYTHON_VERSION}${JOB_TAG}
 PIP_ARGS+=" -I --use-wheel --find-links=$base_url/$wheel_box/ --allow-external --allow-insecure"
 
-# Force virtualenv to accept system_site_packages
-rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt
-
-
 if [ -n "$LOCALE_OVERRIDE" ]; then
     # make sure the locale is available
     # probably useless, since you would need to relogin
     time sudo locale-gen "$LOCALE_OVERRIDE"
 fi
 
-
 # we need these for numpy
 time sudo apt-get $APT_ARGS install libatlas-base-dev gfortran
 
+if [ -n "$NUMPY_BUILD" ]; then
+    # building numpy
+    curdir=$(pwd)
+    echo "building numpy: $curdir"
+
+    # remove the system installed numpy
+    pip uninstall numpy -y
+
+    # clone & install
+    git clone --branch $NUMPY_BUILD https://github.com/numpy/numpy.git numpy
+    cd numpy
+    time sudo python setup.py install
+
+    cd $curdir
+    numpy_version=$(python -c 'import numpy; print(numpy.__version__)')
+    echo "numpy: $numpy_version"
+else
+    # Force virtualenv to accept system_site_packages
+    rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt
+fi
+
 time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt
 
 
@@ -98,6 +117,10 @@ export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH
 which gcc
 ccache -z
 time pip install $(find dist | grep gz | head -n 1)
-# restore cython
-time pip install $PIP_ARGS  $(cat ci/requirements-${wheel_box}.txt | grep -i cython)
+
+# restore cython (if not numpy building)
+if [ -z "$NUMPY_BUILD" ]; then
+    time pip install $PIP_ARGS  $(cat ci/requirements-${wheel_box}.txt | grep -i cython)
+fi
+
 true
diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
@@ -13,7 +13,6 @@ xlrd==0.9.2
 patsy==0.1.0
 html5lib==1.0b2
 lxml==3.2.1
-scikits.timeseries==0.91.3
 scipy==0.10.0
 beautifulsoup4==4.2.1
 statsmodels==0.5.0

diff --git a/ci/requirements-2.7_NUMPY_DEV_1_8_x.txt b/ci/requirements-2.7_NUMPY_DEV_1_8_x.txt
@@ -0,0 +1,3 @@
+python-dateutil
+pytz==2013b
+cython==0.19.1
diff --git a/ci/requirements-2.7_NUMPY_DEV_master.txt b/ci/requirements-2.7_NUMPY_DEV_master.txt
@@ -0,0 +1,3 @@
+python-dateutil
+pytz
+cython==0.19.1
diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt
@@ -8,6 +8,7 @@ numpy==1.8.0
 cython==0.19.1
 numexpr==2.3
 tables==3.1.0
+bottleneck==0.8.0
 matplotlib==1.2.1
 patsy==0.1.0
 lxml==3.2.1

diff --git a/ci/speedpack/build.sh b/ci/speedpack/build.sh
@@ -103,6 +103,12 @@ function generate_wheels() {
 }
 
 
+# generate a single wheel version
+# generate_wheels "/reqf/requirements-3.3.txt"
+# 
+# if vagrant is already up
+# run as vagrant provision
+
 for reqfile in $(ls -1 /reqf/requirements-*.*); do
     generate_wheels "$reqfile"
 done
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -424,10 +424,25 @@ Time series-related
    Series.shift
    Series.first_valid_index
    Series.last_valid_index
-   Series.weekday
    Series.resample
    Series.tz_convert
    Series.tz_localize
+   Series.year
+   Series.month
+   Series.day
+   Series.hour
+   Series.minute
+   Series.second
+   Series.microsecond
+   Series.nanosecond
+   Series.date
+   Series.time
+   Series.dayofyear
+   Series.weekofyear
+   Series.week
+   Series.dayofweek
+   Series.weekday
+   Series.quarter
 
 String handling
 ~~~~~~~~~~~~~~~~~~~
@@ -1129,7 +1144,9 @@ Time/Date Components
    DatetimeIndex.dayofweek
    DatetimeIndex.weekday
    DatetimeIndex.quarter
-
+   DatetimeIndex.tz
+   DatetimeIndex.freq
+   DatetimeIndex.freqstr
 
 Selecting
 ~~~~~~~~~
@@ -1159,7 +1176,7 @@ Conversion
    DatetimeIndex.to_datetime
    DatetimeIndex.to_period
    DatetimeIndex.to_pydatetime
-
+   DatetimeIndex.to_series
 
 GroupBy
 -------

diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
@@ -30,6 +30,43 @@ R packages.
 Base R
 ------
 
+Slicing with R's |c|_
+~~~~~~~~~~~~~~~~~~~~~
+
+R makes it easy to access ``data.frame`` columns by name
+
+.. code-block:: r
+
+   df <- data.frame(a=rnorm(5), b=rnorm(5), c=rnorm(5), d=rnorm(5), e=rnorm(5))
+   df[, c("a", "c", "e")]
+
+or by integer location
+
+.. code-block:: r
+
+   df <- data.frame(matrix(rnorm(1000), ncol=100))
+   df[, c(1:10, 25:30, 40, 50:100)]
+
+Selecting multiple columns by name in ``pandas`` is straightforward
+
+.. ipython:: python
+
+   df = DataFrame(np.random.randn(10, 3), columns=list('abc'))
+   df[['a', 'c']]
+   df.loc[:, ['a', 'c']]
+
+Selecting multiple noncontiguous columns by integer location can be achieved
+with a combination of the ``iloc`` indexer attribute and ``numpy.r_``.
+
+.. ipython:: python
+
+   named = list('abcdefg')
+   n = 30
+   columns = named + np.arange(len(named), n).tolist()
+   df = DataFrame(np.random.randn(n, n), columns=columns)
+
+   df.iloc[:, np.r_[:10, 24:30]]
+
 |aggregate|_
 ~~~~~~~~~~~~
 
@@ -407,6 +444,9 @@ The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>` or :ref:`the groupby documentation<groupby.split>`.
 
+.. |c| replace:: ``c``
+.. _c: http://stat.ethz.ch/R-manual/R-patched/library/base/html/c.html
+
 .. |aggregate| replace:: ``aggregate``
 .. _aggregate: http://finzi.psych.upenn.edu/R/library/stats/html/aggregate.html
 

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -40,8 +40,8 @@
               'sphinx.ext.extlinks',
               'sphinx.ext.todo',
               'numpydoc', # used to parse numpy-style docstrings for autodoc
-              'ipython_directive',
-              'ipython_console_highlighting',
+              'ipython_sphinxext.ipython_directive',
+              'ipython_sphinxext.ipython_console_highlighting',
               'sphinx.ext.intersphinx',
               'sphinx.ext.todo',
               'sphinx.ext.coverage',

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
@@ -250,6 +250,9 @@ Turn a matrix with hours in columns and days in rows into a continuous row seque
 `How to rearrange a python pandas DataFrame?
 <http://stackoverflow.com/questions/15432659/how-to-rearrange-a-python-pandas-dataframe>`__
 
+`Dealing with duplicates when reindexing a timeseries to a specified frequency
+<http://stackoverflow.com/questions/22244383/pandas-df-refill-adding-two-columns-of-different-shape>`__
+
 .. _cookbook.resample:
 
 Resampling
@@ -470,6 +473,75 @@ Storing Attributes to a group node
     store.close()
     os.remove('test.h5')
 
+
+.. _cookbook.binary:
+
+Binary Files
+~~~~~~~~~~~~
+
+Pandas readily accepts numpy record arrays, if you need to read in a binary
+file consisting of an array of C structs. For example, given this C program
+in a file called ``main.c`` compiled with ``gcc main.c -std=gnu99`` on a
+64-bit machine,
+
+.. code-block:: c
+
+   #include <stdio.h>
+   #include <stdint.h>
+
+   typedef struct _Data
+   {
+       int32_t count;
+       double avg;
+       float scale;
+   } Data;
+
+   int main(int argc, const char *argv[])
+   {
+       size_t n = 10;
+       Data d[n];
+
+       for (int i = 0; i < n; ++i)
+       {
+           d[i].count = i;
+           d[i].avg = i + 1.0;
+           d[i].scale = (float) i + 2.0f;
+       }
+
+       FILE *file = fopen("binary.dat", "wb");
+       fwrite(&d, sizeof(Data), n, file);
+       fclose(file);
+
+       return 0;
+   }
+
+the following Python code will read the binary file ``'binary.dat'`` into a
+pandas ``DataFrame``, where each element of the struct corresponds to a column
+in the frame:
+
+.. code-block:: python
+
+   import numpy as np
+   from pandas import DataFrame
+
+   names = 'count', 'avg', 'scale'
+
+   # note that the offsets are larger than the size of the type because of
+   # struct padding
+   offsets = 0, 8, 16
+   formats = 'i4', 'f8', 'f4'
+   dt = np.dtype({'names': names, 'offsets': offsets, 'formats': formats},
+                 align=True)
+   df = DataFrame(np.fromfile('binary.dat', dt))
+
+.. note::
+
+   The offsets of the structure elements may be different depending on the
+   architecture of the machine on which the file was created. Using a raw
+   binary file format like this for general data storage is not recommended, as
+   it is not cross platform. We recommended either HDF5 or msgpack, both of
+   which are supported by pandas' IO facilities.
+
 Computation
 -----------
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    *~
     *.pyc
     *.pyo
     *.swp
@@ Expand Down @@