From cb7c6ae2d8345cfd036e7deb9d76e25795e4ab9d Mon Sep 17 00:00:00 2001
From: Kieran O'Mahony <kieranom@gmail.com>
Date: Fri, 11 May 2012 16:34:10 +0100
Subject: [PATCH 001/114] ENH: Add JSON export option for DataFrame #631

Bundle custom ujson lib for DataFrame and Series JSON export & import.
---
 pandas/core/frame.py                |   87 ++
 pandas/core/series.py               |   71 ++
 pandas/src/ujson/lib/ultrajson.h    |  301 ++++++
 pandas/src/ujson/lib/ultrajsondec.c |  837 +++++++++++++++
 pandas/src/ujson/lib/ultrajsonenc.c |  858 +++++++++++++++
 pandas/src/ujson/python/JSONtoObj.c |  650 +++++++++++
 pandas/src/ujson/python/objToJSON.c | 1554 +++++++++++++++++++++++++++
 pandas/src/ujson/python/ujson.c     |   41 +
 pandas/src/ujson/python/version.h   |    1 +
 pandas/tests/test_frame.py          |  137 +++
 pandas/tests/test_series.py         |   56 +-
 pandas/tests/test_ujson.py          | 1096 +++++++++++++++++++
 setup.py                            |   15 +-
 13 files changed, 5702 insertions(+), 2 deletions(-)
 create mode 100644 pandas/src/ujson/lib/ultrajson.h
 create mode 100644 pandas/src/ujson/lib/ultrajsondec.c
 create mode 100644 pandas/src/ujson/lib/ultrajsonenc.c
 create mode 100644 pandas/src/ujson/python/JSONtoObj.c
 create mode 100644 pandas/src/ujson/python/objToJSON.c
 create mode 100644 pandas/src/ujson/python/ujson.c
 create mode 100644 pandas/src/ujson/python/version.h
 create mode 100644 pandas/tests/test_ujson.py

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9b9e0c62d4730..36202948e9a78 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -679,6 +679,93 @@ def to_dict(self):
         """
         return dict((k, v.to_dict()) for k, v in self.iteritems())
 
+    @classmethod
+    def from_json(cls, json, orient="columns", dtype=None, numpy=True):
+        """
+        Convert JSON string to DataFrame
+
+        Parameters
+        ----------
+        json : The JSON string to parse.
+        orient : {'split', 'records', 'index', 'columns', 'values'},
+                 default 'columns'
+            The format of the JSON string
+            split : dict like
+                {index -> [index], columns -> [columns], data -> [values]}
+            records : list like [{column -> value}, ... , {column -> value}]
+            index : dict like {index -> {column -> value}}
+            columns : dict like {column -> {index -> value}}
+            values : just the values array
+        dtype : dtype of the resulting DataFrame
+        nupmpy: direct decoding to numpy arrays. default True but falls back
+            to standard decoding if a problem occurs.
+
+        Returns
+        -------
+        result : DataFrame
+        """
+        from pandas._ujson import loads
+        df = None
+
+        if numpy:
+            try:
+                if orient == "columns":
+                    args = loads(json, dtype=dtype, numpy=True, labelled=True)
+                    if args:
+                        args = (args[0].T, args[2], args[1])
+                    df = DataFrame(*args)
+                elif orient == "split":
+                    df = DataFrame(**loads(json, dtype=dtype, numpy=True))
+                elif orient == "values":
+                    df = DataFrame(loads(json, dtype=dtype, numpy=True))
+                else:
+                    df = DataFrame(*loads(json, dtype=dtype, numpy=True,
+                                          labelled=True))
+            except ValueError:
+                numpy = False
+        if not numpy:
+            if orient == "columns":
+                df = DataFrame(loads(json), dtype=dtype)
+            elif orient == "split":
+                df = DataFrame(dtype=dtype, **loads(json))
+            elif orient == "index":
+                df = DataFrame(loads(json), dtype=dtype).T
+            else:
+                df = DataFrame(loads(json), dtype=dtype)
+
+        return df
+
+    def to_json(self, orient="columns", double_precision=10,
+                force_ascii=True):
+        """
+        Convert DataFrame to a JSON string.
+
+        Note NaN's and None will be converted to null and datetime objects
+        will be converted to UNIX timestamps.
+
+        Parameters
+        ----------
+        orient : {'split', 'records', 'index', 'columns', 'values'},
+                 default 'columns'
+            The format of the JSON string
+            split : dict like
+                {index -> [index], columns -> [columns], data -> [values]}
+            records : list like [{column -> value}, ... , {column -> value}]
+            index : dict like {index -> {column -> value}}
+            columns : dict like {column -> {index -> value}}
+            values : just the values array
+        double_precision : The number of decimal places to use when encoding
+            floating point values, default 10.
+        force_ascii : force encoded string to be ASCII, default True.
+
+        Returns
+        -------
+        result : JSON compatible string
+        """
+        from pandas._ujson import dumps
+        return dumps(self, orient=orient, double_precision=double_precision,
+                     ensure_ascii=force_ascii)
+
     @classmethod
     def from_records(cls, data, index=None, exclude=None, columns=None,
                      names=None, coerce_float=False):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c0de6aa21826d..0ca78e3d2236e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -911,6 +911,77 @@ def to_dict(self):
         """
         return dict(self.iteritems())
 
+    @classmethod
+    def from_json(cls, json, orient="index", dtype=None, numpy=True):
+        """
+        Convert JSON string to Series
+
+        Parameters
+        ----------
+        json : The JSON string to parse.
+        orient : {'split', 'records', 'index'}, default 'index'
+            The format of the JSON string
+            split : dict like
+                {index -> [index], name -> name, data -> [values]}
+            records : list like [value, ... , value]
+            index : dict like {index -> value}
+        dtype : dtype of the resulting Series
+        nupmpy: direct decoding to numpy arrays. default True but falls back
+            to standard decoding if a problem occurs.
+
+        Returns
+        -------
+        result : Series
+        """
+        from pandas._ujson import loads
+        s = None
+
+        if numpy:
+            try:
+                if orient == "split":
+                    s = Series(**loads(json, dtype=dtype, numpy=True))
+                elif orient == "columns" or orient == "index":
+                    s = Series(*loads(json, dtype=dtype, numpy=True,
+                                      labelled=True))
+                else:
+                    s = Series(loads(json, dtype=dtype, numpy=True))
+            except ValueError:
+                numpy = False
+        if not numpy:
+            if orient == "split":
+                s = Series(dtype=dtype, **loads(json))
+            else:
+                s = Series(loads(json), dtype=dtype)
+
+        return s
+
+    def to_json(self, orient="index", double_precision=10, force_ascii=True):
+        """
+        Convert Series to a JSON string
+
+        Note NaN's and None will be converted to null and datetime objects
+        will be converted to UNIX timestamps.
+
+        Parameters
+        ----------
+        orient : {'split', 'records', 'index'}, default 'index'
+            The format of the JSON string
+            split : dict like
+                {index -> [index], name -> name, data -> [values]}
+            records : list like [value, ... , value]
+            index : dict like {index -> value}
+        double_precision : The number of decimal places to use when encoding
+            floating point values, default 10.
+        force_ascii : force encoded string to be ASCII, default True.
+
+        Returns
+        -------
+        result : JSON compatible string
+        """
+        from pandas._ujson import dumps
+        return dumps(self, orient=orient, double_precision=double_precision,
+                     ensure_ascii=force_ascii)
+
     def to_sparse(self, kind='block', fill_value=None):
         """
         Convert Series to SparseSeries
diff --git a/pandas/src/ujson/lib/ultrajson.h b/pandas/src/ujson/lib/ultrajson.h
new file mode 100644
index 0000000000000..0514236e750e1
--- /dev/null
+++ b/pandas/src/ujson/lib/ultrajson.h
@@ -0,0 +1,301 @@
+/*
+Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+   must display the following acknowledgement:
+   This product includes software developed by ESN Social Software AB (www.esn.me).
+4. Neither the name of the ESN Social Software AB nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Portions of code from:
+MODP_ASCII - Ascii transformations (upper/lower, etc)
+http://code.google.com/p/stringencoders/
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+
+*/
+
+/*
+Ultra fast JSON encoder and decoder
+Developed by Jonas Tarnstrom (jonas@esn.me).
+
+Encoder notes:
+------------------
+
+:: Cyclic references ::
+Cyclic referenced objects are not detected. 
+Set JSONObjectEncoder.recursionMax to suitable value or make sure input object 
+tree doesn't have cyclic references.
+
+*/
+
+#ifndef __ULTRAJSON_H__
+#define __ULTRAJSON_H__
+
+#include <stdio.h>
+#include <wchar.h>
+
+//#define JSON_DECODE_NUMERIC_AS_DOUBLE
+
+// Don't output any extra whitespaces when encoding
+#define JSON_NO_EXTRA_WHITESPACE
+
+// Max decimals to encode double floating point numbers with
+#ifndef JSON_DOUBLE_MAX_DECIMALS
+#define JSON_DOUBLE_MAX_DECIMALS 15
+#endif
+
+// Max recursion depth, default for encoder
+#ifndef JSON_MAX_RECURSION_DEPTH
+#define JSON_MAX_RECURSION_DEPTH 1024
+#endif
+
+/*
+Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */
+#ifndef JSON_MAX_STACK_BUFFER_SIZE
+#define JSON_MAX_STACK_BUFFER_SIZE 131072
+#endif
+
+#ifdef _WIN32
+
+typedef __int64 JSINT64;
+typedef unsigned __int64 JSUINT64;
+
+typedef unsigned __int32 uint32_t;
+typedef __int32 JSINT32;
+typedef uint32_t JSUINT32;
+typedef unsigned __int8 JSUINT8;
+typedef unsigned __int16 JSUTF16;
+typedef unsigned __int32 JSUTF32;
+typedef __int64 JSLONG;
+
+#define EXPORTFUNCTION __declspec(dllexport)
+
+#define FASTCALL_MSVC __fastcall
+#define FASTCALL_ATTR 
+#define INLINE_PREFIX __inline
+
+#else
+
+#include <sys/types.h>
+typedef int64_t JSINT64;
+typedef u_int64_t JSUINT64;
+
+typedef int32_t JSINT32;
+typedef u_int32_t JSUINT32;
+
+#define FASTCALL_MSVC 
+#define FASTCALL_ATTR __attribute__((fastcall))
+#define INLINE_PREFIX inline
+
+typedef u_int32_t uint32_t;
+
+typedef u_int8_t JSUINT8;
+typedef u_int16_t JSUTF16;
+typedef u_int32_t JSUTF32;
+
+typedef int64_t JSLONG;
+
+#define EXPORTFUNCTION
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define __LITTLE_ENDIAN__
+#else
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define __BIG_ENDIAN__
+#endif
+
+#endif
+
+#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
+#error "Endianess not supported"
+#endif
+
+enum JSTYPES
+{
+	JT_NULL,		// NULL
+	JT_TRUE,		//boolean true
+	JT_FALSE,		//boolean false
+	JT_INT,			//(JSINT32 (signed 32-bit))
+	JT_LONG,		//(JSINT64 (signed 64-bit))
+	JT_DOUBLE,	//(double)
+	JT_UTF8,		//(char 8-bit)
+	JT_ARRAY,		// Array structure
+	JT_OBJECT,	// Key/Value structure 
+	JT_INVALID,	// Internal, do not return nor expect
+};
+
+typedef void * JSOBJ;
+typedef void * JSITER;
+
+typedef struct __JSONTypeContext
+{
+	int type;
+	void *encoder;
+	void *prv[32];
+} JSONTypeContext;
+
+/*
+Function pointer declarations, suitable for implementing UltraJSON */
+typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
+typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
+typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
+typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
+typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, size_t *outLen);
+typedef	void *(*JSPFN_MALLOC)(size_t size);
+typedef void (*JSPFN_FREE)(void *pptr);
+typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
+
+typedef struct __JSONObjectEncoder
+{
+	void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
+	void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc);
+	const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen);
+	JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
+	JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc);
+	double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
+
+	/*
+	Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) 
+	Implementor should setup iteration state in ti->prv 
+	*/
+	JSPFN_ITERBEGIN iterBegin;
+
+	/*
+	Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items.
+	Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this
+	*/
+	JSPFN_ITERNEXT iterNext;
+
+	/*
+	Ends the iteration of an iteratable object.
+	Any iteration state stored in ti->prv can be freed here
+	*/
+	JSPFN_ITEREND iterEnd;
+
+	/*
+	Returns a reference to the value object of an iterator
+	The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
+	*/
+	JSPFN_ITERGETVALUE iterGetValue;
+	
+	/*
+	Return name of iterator. 
+	The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
+	*/
+	JSPFN_ITERGETNAME iterGetName;
+	
+	/*
+	Release a value as indicated by setting ti->release = 1 in the previous getValue call.
+	The ti->prv array should contain the necessary context to release the value
+	*/
+	void (*releaseObject)(JSOBJ obj);
+
+	/* Library functions 
+	Set to NULL to use STDLIB malloc,realloc,free */
+	JSPFN_MALLOC malloc;
+	JSPFN_REALLOC realloc;
+	JSPFN_FREE free;
+
+	/*
+	Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/
+	int recursionMax;
+
+	/*
+	Configuration for max decimals of double floating poiunt numbers to encode (0-9) */
+	int doublePrecision;
+
+	/*
+	If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */
+	int forceASCII;
+
+
+	/*
+	Set to an error message if error occured */
+	const char *errorMsg;
+	JSOBJ errorObj;
+
+	/* Buffer stuff */
+	char *start;
+	char *offset;
+	char *end;
+	int heap;
+	int level;
+
+} JSONObjectEncoder;
+
+
+/*
+Encode an object structure into JSON.
+
+Arguments:
+obj - An anonymous type representing the object
+enc - Function definitions for querying JSOBJ type
+buffer - Preallocated buffer to store result in. If NULL function allocates own buffer
+cbBuffer - Length of buffer (ignored if buffer is NULL)
+
+Returns:
+Encoded JSON object as a null terminated char string. 
+
+NOTE:
+If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer. 
+Life cycle of the provided buffer must still be handled by caller. 
+
+If the return value doesn't equal the specified buffer caller must release the memory using
+JSONObjectEncoder.free or free() as specified when calling this function.
+*/
+EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer);
+
+
+
+typedef struct __JSONObjectDecoder
+{
+	JSOBJ (*newString)(wchar_t *start, wchar_t *end);
+	int (*objectAddKey)(JSOBJ obj, JSOBJ name, JSOBJ value);
+	int (*arrayAddItem)(JSOBJ obj, JSOBJ value);
+	JSOBJ (*newTrue)(void);
+	JSOBJ (*newFalse)(void);
+	JSOBJ (*newNull)(void);
+	JSOBJ (*newObject)(void *decoder);
+	JSOBJ (*endObject)(JSOBJ obj);
+	JSOBJ (*newArray)(void *decoder);
+	JSOBJ (*endArray)(JSOBJ obj);
+	JSOBJ (*newInt)(JSINT32 value);
+	JSOBJ (*newLong)(JSINT64 value);
+	JSOBJ (*newDouble)(double value);
+	void (*releaseObject)(JSOBJ obj, void *decoder);
+	JSPFN_MALLOC malloc;
+	JSPFN_FREE free;
+	JSPFN_REALLOC realloc;
+
+	char *errorStr;
+	char *errorOffset;
+
+
+
+} JSONObjectDecoder;
+
+EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer);
+
+#endif
diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/src/ujson/lib/ultrajsondec.c
new file mode 100644
index 0000000000000..591122be82f92
--- /dev/null
+++ b/pandas/src/ujson/lib/ultrajsondec.c
@@ -0,0 +1,837 @@
+/*
+Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+   must display the following acknowledgement:
+   This product includes software developed by ESN Social Software AB (www.esn.me).
+4. Neither the name of the ESN Social Software AB nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Portions of code from:
+MODP_ASCII - Ascii transformations (upper/lower, etc)
+http://code.google.com/p/stringencoders/
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+
+*/
+
+#include "ultrajson.h"
+#include <math.h>
+#include <assert.h>
+#include <string.h>
+#include <limits.h>
+#include <wchar.h>
+
+struct DecoderState
+{
+	char *start;
+	char *end;
+	wchar_t *escStart;
+	wchar_t *escEnd;
+	int escHeap;
+	int lastType;
+	JSONObjectDecoder *dec;
+};
+
+JSOBJ FASTCALL_MSVC decode_any( struct DecoderState *ds) FASTCALL_ATTR;
+typedef JSOBJ (*PFN_DECODER)( struct DecoderState *ds);
+#define RETURN_JSOBJ_NULLCHECK(_expr) return(_expr);
+
+double createDouble(double intNeg, double intValue, double frcValue, int frcDecimalCount)
+{
+	static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000};
+
+	return (intValue + (frcValue / g_pow10[frcDecimalCount])) * intNeg;
+}
+
+static JSOBJ SetError( struct DecoderState *ds, int offset, const char *message)
+{
+	ds->dec->errorOffset = ds->start + offset;
+	ds->dec->errorStr = (char *) message;
+	return NULL;
+}
+
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric ( struct DecoderState *ds)
+{
+#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
+	double intNeg = 1;
+	double intValue;
+#else
+	int intNeg = 1;
+	JSLONG intValue;
+#endif
+
+	double expNeg;
+	int chr;
+	int decimalCount = 0;
+	double frcValue = 0.0;
+	double expValue;
+	char *offset = ds->start;
+
+	if (*(offset) == '-')
+	{
+		offset ++;
+		intNeg = -1;
+	}
+
+	// Scan integer part
+	intValue = 0;
+
+	while (1)
+	{
+		chr = (int) (unsigned char) *(offset);
+
+		switch (chr)
+		{
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			//FIXME: Check for arithemtic overflow here
+			//PERF: Don't do 64-bit arithmetic here unless we know we have to
+#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
+			intValue = intValue * 10.0 + (double) (chr - 48);
+#else
+			intValue = intValue * 10LL + (JSLONG) (chr - 48);
+#endif
+			offset ++;
+			break;
+
+		case '.':
+			offset ++;
+			goto DECODE_FRACTION;
+			break;
+
+		case 'e':
+		case 'E':
+			offset ++;
+			goto DECODE_EXPONENT;
+			break;
+
+		default:
+			goto BREAK_INT_LOOP;
+			break;
+		}
+	}
+
+BREAK_INT_LOOP:
+
+	ds->lastType = JT_INT;
+	ds->start = offset;
+
+	//If input string is LONGLONG_MIN here the value is already negative so we should not flip it
+
+#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
+#else
+	if (intValue < 0)
+	{
+		intNeg = 1;
+	}
+#endif
+
+	//dbg1 = (intValue * intNeg);
+	//dbg2 = (JSLONG) dbg1;
+
+#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
+	if (intValue > (double) INT_MAX || intValue < (double) INT_MIN)
+#else
+	if ( (intValue >> 31))
+#endif
+	{	
+		RETURN_JSOBJ_NULLCHECK(ds->dec->newLong( (JSINT64) (intValue * (JSINT64) intNeg)));
+	}
+	else
+	{
+		RETURN_JSOBJ_NULLCHECK(ds->dec->newInt( (JSINT32) (intValue * intNeg)));
+	}
+
+
+
+DECODE_FRACTION:
+
+	// Scan fraction part
+	frcValue = 0.0;
+	while (1)
+	{
+		chr = (int) (unsigned char) *(offset);
+
+		switch (chr)
+		{
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (decimalCount < JSON_DOUBLE_MAX_DECIMALS)
+			{
+				frcValue = frcValue * 10.0 + (double) (chr - 48);
+				decimalCount ++;
+			}
+			offset ++;
+			break;
+
+		case 'e':
+		case 'E':
+			offset ++;
+			goto DECODE_EXPONENT;
+			break;
+
+		default:
+			goto BREAK_FRC_LOOP;
+		}
+	}
+
+BREAK_FRC_LOOP:
+
+	if (intValue < 0)
+	{
+		intNeg = 1;
+	}
+
+	//FIXME: Check for arithemtic overflow here
+	ds->lastType = JT_DOUBLE;
+	ds->start = offset;
+	RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue, frcValue, decimalCount)));
+
+DECODE_EXPONENT:
+	expNeg = 1.0;
+
+	if (*(offset) == '-')
+	{
+		expNeg = -1.0;
+		offset ++;
+	}
+	else
+	if (*(offset) == '+')
+	{
+		expNeg = +1.0;
+		offset ++;
+	}
+
+	expValue = 0.0;
+
+	while (1)
+	{
+		chr = (int) (unsigned char) *(offset);
+
+		switch (chr)
+		{
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			expValue = expValue * 10.0 + (double) (chr - 48);
+			offset ++;
+			break;
+
+		default:
+			goto BREAK_EXP_LOOP;
+
+		}
+	}
+
+BREAK_EXP_LOOP:
+
+#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE
+#else
+	if (intValue < 0)
+	{
+		intNeg = 1;
+	}
+#endif
+	
+	//FIXME: Check for arithemtic overflow here
+	ds->lastType = JT_DOUBLE;
+	ds->start = offset;
+	RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue , frcValue, decimalCount) * pow(10.0, expValue * expNeg)));
+}
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds) 
+{
+	char *offset = ds->start;
+	offset ++;
+
+	if (*(offset++) != 'r')
+		goto SETERROR;
+	if (*(offset++) != 'u')
+		goto SETERROR;
+	if (*(offset++) != 'e')
+		goto SETERROR;
+
+	ds->lastType = JT_TRUE;
+	ds->start = offset;
+	RETURN_JSOBJ_NULLCHECK(ds->dec->newTrue());
+
+SETERROR:
+	return SetError(ds, -1, "Unexpected character found when decoding 'true'");
+}
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false ( struct DecoderState *ds) 
+{
+	char *offset = ds->start;
+	offset ++;
+
+	if (*(offset++) != 'a')
+		goto SETERROR;
+	if (*(offset++) != 'l')
+		goto SETERROR;
+	if (*(offset++) != 's')
+		goto SETERROR;
+	if (*(offset++) != 'e')
+		goto SETERROR;
+
+	ds->lastType = JT_FALSE;
+	ds->start = offset;
+	RETURN_JSOBJ_NULLCHECK(ds->dec->newFalse());
+
+SETERROR:
+	return SetError(ds, -1, "Unexpected character found when decoding 'false'");
+
+}
+
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null ( struct DecoderState *ds) 
+{
+	char *offset = ds->start;
+	offset ++;
+
+	if (*(offset++) != 'u')
+		goto SETERROR;
+	if (*(offset++) != 'l')
+		goto SETERROR;
+	if (*(offset++) != 'l')
+		goto SETERROR;
+
+	ds->lastType = JT_NULL;
+	ds->start = offset;
+	RETURN_JSOBJ_NULLCHECK(ds->dec->newNull());
+
+SETERROR:
+	return SetError(ds, -1, "Unexpected character found when decoding 'null'");
+}
+
+FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) 
+{
+	char *offset = ds->start;
+
+	while (1)
+	{
+		switch (*offset)
+		{
+		case ' ':
+		case '\t':
+		case '\r':
+		case '\n':
+			offset ++;
+			break;
+
+		default:
+			ds->start = offset;
+			return;
+		}
+	}
+}
+
+
+enum DECODESTRINGSTATE
+{
+	DS_ISNULL = 0x32,
+	DS_ISQUOTE,
+	DS_ISESCAPE,
+	DS_UTFLENERROR,
+
+};
+
+static const JSUINT8 g_decoderLookup[256] = 
+{
+/* 0x00 */ DS_ISNULL, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0x20 */ 1, 1, DS_ISQUOTE, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, DS_ISESCAPE, 1, 1, 1,
+/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
+/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
+/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, 
+};
+
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds)
+{
+	JSUTF16 sur[2] = { 0 };
+	int iSur = 0;
+	int index;
+	wchar_t *escOffset;
+	size_t escLen = (ds->escEnd - ds->escStart);
+	JSUINT8 *inputOffset;
+	JSUINT8 oct;
+	JSUTF32 ucs;
+	ds->lastType = JT_INVALID;
+	ds->start ++;
+
+	if ( (ds->end - ds->start) > escLen)
+	{
+		size_t newSize = (ds->end - ds->start);
+
+		if (ds->escHeap)
+		{
+			ds->escStart = (wchar_t *) ds->dec->realloc (ds->escStart, newSize * sizeof(wchar_t));
+		}
+		else
+		{
+			wchar_t *oldStart = ds->escStart;
+			ds->escHeap = 1;
+			ds->escStart = (wchar_t *) ds->dec->malloc (newSize * sizeof(wchar_t));
+			memcpy (ds->escStart, oldStart, escLen * sizeof(wchar_t));
+		}
+
+		ds->escEnd = ds->escStart + newSize;
+	}
+
+	escOffset = ds->escStart;
+	inputOffset = ds->start;
+
+	while(1)
+	{
+		switch (g_decoderLookup[(JSUINT8)(*inputOffset)])
+		{
+		case DS_ISNULL:
+			return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'");
+
+		case DS_ISQUOTE:
+			ds->lastType = JT_UTF8;
+			inputOffset ++;
+			ds->start += ( (char *) inputOffset - (ds->start));
+			RETURN_JSOBJ_NULLCHECK(ds->dec->newString(ds->escStart, escOffset));
+
+		case DS_UTFLENERROR:
+			return SetError (ds, -1, "Invalid UTF-8 sequence length when decoding 'string'");
+
+		case DS_ISESCAPE:
+			inputOffset ++;
+			switch (*inputOffset)
+			{
+			case '\\': *(escOffset++) = L'\\'; inputOffset++; continue;
+			case '\"': *(escOffset++) = L'\"'; inputOffset++; continue;
+			case '/':  *(escOffset++) = L'/';  inputOffset++; continue;
+			case 'b':  *(escOffset++) = L'\b'; inputOffset++; continue;
+			case 'f':  *(escOffset++) = L'\f'; inputOffset++; continue;
+			case 'n':  *(escOffset++) = L'\n'; inputOffset++; continue;
+			case 'r':  *(escOffset++) = L'\r'; inputOffset++; continue;
+			case 't':  *(escOffset++) = L'\t'; inputOffset++; continue;
+
+			case 'u':
+				{
+					int index;
+					inputOffset ++;
+
+					for (index = 0; index < 4; index ++)
+					{
+						switch (*inputOffset)
+						{
+						case '\0':	return SetError (ds, -1, "Unterminated unicode escape sequence when decoding 'string'");
+						default:		return SetError (ds, -1, "Unexpected character in unicode escape sequence when decoding 'string'");
+
+						case '0':
+						case '1':
+						case '2':
+						case '3':
+						case '4':
+						case '5':
+						case '6':
+						case '7':
+						case '8':
+						case '9':
+							sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0');
+							break;
+
+						case 'a':
+						case 'b':
+						case 'c':
+						case 'd':
+						case 'e':
+						case 'f':
+							sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a');
+							break;
+
+						case 'A':
+						case 'B':
+						case 'C':
+						case 'D':
+						case 'E':
+						case 'F':
+							sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A');
+							break;
+						}
+
+						inputOffset ++;
+					}
+
+
+					if (iSur == 0)
+					{
+						if((sur[iSur] & 0xfc00) == 0xd800)
+						{
+							// First of a surrogate pair, continue parsing
+							iSur ++;
+							break;
+						} 
+						(*escOffset++) = (wchar_t) sur[iSur];
+						iSur = 0;
+					}
+					else
+					{
+						// Decode pair
+						if ((sur[1] & 0xfc00) != 0xdc00)
+						{
+							return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'");
+						}
+
+#if WCHAR_MAX == 0xffff
+						(*escOffset++) = (wchar_t) sur[0];
+						(*escOffset++) = (wchar_t) sur[1];
+#else
+						(*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00));
+#endif
+						iSur = 0;
+					}
+					break;
+				}
+
+			case '\0': return SetError(ds, -1, "Unterminated escape sequence when decoding 'string'");
+			default: return SetError(ds, -1, "Unrecognized escape sequence when decoding 'string'");
+			}
+			break;
+
+		case 1:
+			*(escOffset++) = (wchar_t) (*inputOffset++); 
+			break;
+
+		case 2:
+		{
+			ucs = (*inputOffset++) & 0x1f;
+			ucs <<= 6;
+			if (((*inputOffset) & 0x80) != 0x80)
+			{
+				return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
+			}
+			ucs |= (*inputOffset++) & 0x3f;
+			if (ucs < 0x80)	return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'");
+			*(escOffset++) = (wchar_t) ucs;
+			break;
+		}
+
+		case 3:
+		{
+			JSUTF32 ucs = 0;
+			ucs |= (*inputOffset++) & 0x0f;
+
+			for (index = 0; index < 2; index ++)
+			{
+				ucs <<= 6;
+				oct = (*inputOffset++);
+
+				if ((oct & 0x80) != 0x80)
+				{
+					return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
+				}
+
+				ucs |= oct & 0x3f;
+			}
+
+			if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string");
+			*(escOffset++) = (wchar_t) ucs;
+			break;
+		}
+
+		case 4:
+		{
+			JSUTF32 ucs = 0;
+			ucs |= (*inputOffset++) & 0x07;
+
+			for (index = 0; index < 3; index ++)
+			{
+				ucs <<= 6;
+				oct = (*inputOffset++);
+
+				if ((oct & 0x80) != 0x80)
+				{
+					return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'");
+				}
+
+				ucs |= oct & 0x3f;
+			}
+
+			if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'");
+
+			#if WCHAR_MAX == 0xffff
+			if (ucs >= 0x10000)
+			{
+				ucs -= 0x10000;
+				*(escOffset++) = (ucs >> 10) + 0xd800;
+				*(escOffset++) = (ucs & 0x3ff) + 0xdc00;
+			}
+			else
+			{
+				*(escOffset++) = (wchar_t) ucs;
+			}
+			#else
+			*(escOffset++) = (wchar_t) ucs;
+			#endif
+			break;
+		}
+		}
+	}
+}
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array( struct DecoderState *ds)
+{
+	JSOBJ itemValue;
+	JSOBJ newObj = ds->dec->newArray(ds->dec);
+
+	ds->lastType = JT_INVALID;
+	ds->start ++;
+
+	while (1)//(*ds->start) != '\0')
+	{
+		SkipWhitespace(ds);
+
+		if ((*ds->start) == ']')
+		{
+			ds->start++;
+			return ds->dec->endArray(newObj);
+		}
+
+		itemValue = decode_any(ds);
+
+		if (itemValue == NULL)
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			return NULL;
+		}
+
+		if (!ds->dec->arrayAddItem (newObj, itemValue)) 
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			return NULL;
+		}
+
+		SkipWhitespace(ds);
+
+		switch (*(ds->start++))
+		{
+			case ']':
+				return ds->dec->endArray(newObj);
+
+			case ',':
+				break;
+
+			default:
+				ds->dec->releaseObject(newObj, ds->dec);
+				return SetError(ds, -1, "Unexpected character in found when decoding array value");
+		}
+	}
+
+	ds->dec->releaseObject(newObj, ds->dec);
+	return SetError(ds, -1, "Unmatched ']' when decoding 'array'");
+}
+
+
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds)
+{
+	JSOBJ itemName;
+	JSOBJ itemValue;
+	JSOBJ newObj = ds->dec->newObject(ds->dec);
+
+	ds->start ++;
+
+	while (1)
+	{
+		SkipWhitespace(ds);
+
+		if ((*ds->start) == '}')
+		{
+			ds->start ++;
+			return ds->dec->endObject(newObj);
+		}
+
+		ds->lastType = JT_INVALID;
+		itemName = decode_any(ds);
+
+		if (itemName == NULL)
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			return NULL;
+		}
+
+		if (ds->lastType != JT_UTF8)
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			ds->dec->releaseObject(itemName, ds->dec);
+			return SetError(ds, -1, "Key name of object must be 'string' when decoding 'object'");
+		}
+
+		SkipWhitespace(ds);
+
+		if (*(ds->start++) != ':')
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			ds->dec->releaseObject(itemName, ds->dec);
+			return SetError(ds, -1, "No ':' found when decoding object value");
+		}
+
+		SkipWhitespace(ds);
+
+		itemValue = decode_any(ds);
+
+		if (itemValue == NULL)
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			ds->dec->releaseObject(itemName, ds->dec);
+			return NULL;
+		}
+
+		if (!ds->dec->objectAddKey (newObj, itemName, itemValue)) 
+		{
+			ds->dec->releaseObject(newObj, ds->dec);
+			ds->dec->releaseObject(itemName, ds->dec);
+			ds->dec->releaseObject(itemValue, ds->dec);
+			return NULL;
+		}
+
+		SkipWhitespace(ds);
+
+		switch (*(ds->start++))
+		{
+			case '}':
+				return ds->dec->endObject(newObj);
+
+			case ',':
+				break;
+
+			default:
+				ds->dec->releaseObject(newObj, ds->dec);
+				return SetError(ds, -1, "Unexpected character in found when decoding object value");
+		}
+	}
+
+	ds->dec->releaseObject(newObj, ds->dec);
+	return SetError(ds, -1, "Unmatched '}' when decoding object");
+}
+
+FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds)
+{
+	while (1)
+	{
+		switch (*ds->start)
+		{
+			case '\"': 
+				return decode_string (ds);
+			case '0': 
+			case '1':
+			case '2': 
+			case '3': 
+			case '4': 
+			case '5':
+			case '6': 
+			case '7': 
+			case '8': 
+			case '9': 
+			case '-': 
+				return decode_numeric (ds);
+
+			case '[':	return decode_array (ds);
+			case '{': return decode_object (ds);
+			case 't': return decode_true (ds);
+			case 'f': return decode_false (ds);
+			case 'n': return decode_null (ds);
+
+			case ' ':
+			case '\t':
+			case '\r':
+			case '\n':
+				// White space
+				ds->start ++;
+				break;
+
+			default:
+				return SetError(ds, -1, "Expected object or value");
+		}
+	}
+}
+
+
+JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer)
+{
+
+	/*
+	FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */
+	struct DecoderState ds;
+	wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))];
+	JSOBJ ret;
+	
+	ds.start = (char *) buffer;
+	ds.end = ds.start + cbBuffer;
+
+	ds.escStart = escBuffer;
+	ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t));
+	ds.escHeap = 0;
+	ds.dec = dec;
+	ds.dec->errorStr = NULL;
+	ds.dec->errorOffset = NULL;
+
+	ds.dec = dec;
+
+	ret = decode_any (&ds);
+	
+	if (ds.escHeap)
+	{
+		dec->free(ds.escStart);
+	}
+	return ret;
+}
diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c
new file mode 100644
index 0000000000000..594bef253b2f6
--- /dev/null
+++ b/pandas/src/ujson/lib/ultrajsonenc.c
@@ -0,0 +1,858 @@
+/*
+Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+   must display the following acknowledgement:
+   This product includes software developed by ESN Social Software AB (www.esn.me).
+4. Neither the name of the ESN Social Software AB nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Portions of code from:
+MODP_ASCII - Ascii transformations (upper/lower, etc)
+http://code.google.com/p/stringencoders/
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+
+*/
+
+#include "ultrajson.h"
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <float.h>
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000};
+static const char g_hexChars[] = "0123456789abcdef";
+static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/";
+
+
+/*
+FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands.
+Needs a cleanup and more documentation */
+
+/*
+Table for pure ascii output escaping all characters above 127 to \uXXXX */
+static const JSUINT8 g_asciiOutputTable[256] = 
+{
+/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30, 
+/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 
+/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1,
+/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
+/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
+/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+};
+
+
+static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message)
+{
+	enc->errorMsg = message;
+	enc->errorObj = obj;
+}
+
+/*
+FIXME: Keep track of how big these get across several encoder calls and try to make an estimate
+That way we won't run our head into the wall each call */
+void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded)
+{
+	size_t curSize = enc->end - enc->start;
+	size_t newSize = curSize * 2;
+	size_t offset = enc->offset - enc->start;
+
+	while (newSize < curSize + cbNeeded)
+	{
+		newSize *= 2;
+	}
+
+	if (enc->heap)
+	{
+		enc->start = (char *) enc->realloc (enc->start, newSize);
+	}
+	else
+	{
+		char *oldStart = enc->start;
+		enc->heap = 1;
+		enc->start = (char *) enc->malloc (newSize);
+		memcpy (enc->start, oldStart, offset);
+	}
+	enc->offset = enc->start + offset;
+	enc->end = enc->start + newSize;
+}
+
+FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value)
+{
+	*(outputOffset++) = g_hexChars[(value & 0xf000) >> 12];
+	*(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8];
+	*(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4];
+	*(outputOffset++) = g_hexChars[(value & 0x000f) >> 0];
+}
+
+int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
+{
+	char *of = (char *) enc->offset;
+
+	while (1)
+	{
+		switch (*io)
+		{
+		case 0x00:
+			if (io < end)
+			{
+				*(of++) = '\\';
+				*(of++) = 'u';
+				*(of++) = '0';
+				*(of++) = '0';
+				*(of++) = '0';
+				*(of++) = '0';
+				break;
+			}
+			else
+			{
+				enc->offset += (of - enc->offset); 
+				return TRUE;
+			}
+
+		case '\"': (*of++) = '\\'; (*of++) = '\"'; break;
+		case '\\': (*of++) = '\\'; (*of++) = '\\'; break;
+		case '/':  (*of++) = '\\'; (*of++) = '/'; break;
+		case '\b': (*of++) = '\\'; (*of++) = 'b'; break;
+		case '\f': (*of++) = '\\'; (*of++) = 'f'; break;
+		case '\n': (*of++) = '\\'; (*of++) = 'n'; break;
+		case '\r': (*of++) = '\\'; (*of++) = 'r'; break;
+		case '\t': (*of++) = '\\'; (*of++) = 't'; break;
+
+		case 0x01:
+		case 0x02:
+		case 0x03:
+		case 0x04:
+		case 0x05:
+		case 0x06:
+		case 0x07:
+		case 0x0b:
+		case 0x0e:
+		case 0x0f:
+		case 0x10:
+		case 0x11:
+		case 0x12:
+		case 0x13:
+		case 0x14:
+		case 0x15:
+		case 0x16:
+		case 0x17:
+		case 0x18:
+		case 0x19:
+		case 0x1a:
+		case 0x1b:
+		case 0x1c:
+		case 0x1d:
+		case 0x1e:
+		case 0x1f:
+			*(of++) = '\\';
+			*(of++) = 'u';
+			*(of++) = '0';
+			*(of++) = '0';
+			*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
+			*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
+			break;
+
+		default: (*of++) = (*io); break;
+		}
+
+		io++;
+	}
+
+	return FALSE;
+}
+
+
+/*
+FIXME:
+This code only works with Little and Big Endian
+
+FIXME: The JSON spec says escape "/" but non of the others do and we don't 
+want to be left alone doing it so we don't :)
+
+*/
+int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
+{
+	JSUTF32 ucs;
+	char *of = (char *) enc->offset;
+
+	while (1)
+	{
+
+		//JSUINT8 chr = (unsigned char) *io;
+		JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io];
+
+		switch (utflen)
+		{
+			case 0: 
+			{
+				if (io < end)
+				{
+					*(of++) = '\\';
+					*(of++) = 'u';
+					*(of++) = '0';
+					*(of++) = '0';
+					*(of++) = '0';
+					*(of++) = '0';
+					io ++;
+					continue;
+				}
+				else
+				{
+					enc->offset += (of - enc->offset); 
+					return TRUE;
+				}
+			}
+
+			case 1:
+			{
+				*(of++)= (*io++); 
+				continue;
+			}
+
+			case 2:
+			{
+				JSUTF32 in;
+
+				if (io + 1 > end)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
+					return FALSE;
+				}
+
+				in = *((JSUTF16 *) io);
+
+#ifdef __LITTLE_ENDIAN__
+				ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f);
+#else
+				ucs = ((in & 0x1f00) >> 2) | (in & 0x3f);
+#endif
+
+				if (ucs < 0x80)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Overlong 2 byte UTF-8 sequence detected when encoding string");
+					return FALSE;
+				}
+
+				io += 2;
+				break;
+			}
+
+			case 3:
+			{
+				JSUTF32 in;
+
+				if (io + 2 > end)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
+					return FALSE;
+				}
+
+#ifdef __LITTLE_ENDIAN__
+				in = *((JSUTF16 *) io);
+				in |= *((JSUINT8 *) io + 2) << 16;
+				ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | ((in & 0x3f0000) >> 16);
+#else
+				in = *((JSUTF16 *) io) << 8;
+				in |= *((JSUINT8 *) io + 2);
+				ucs = ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f);
+#endif
+
+
+				if (ucs < 0x800)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Overlong 3 byte UTF-8 sequence detected when encoding string");
+					return FALSE;
+				}
+
+				io += 3;
+				break;
+			}
+			case 4:
+			{
+				JSUTF32 in;
+				
+				if (io + 3 > end)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string");
+					return FALSE;
+				}
+
+#ifdef __LITTLE_ENDIAN__
+				in = *((JSUTF32 *) io);
+				ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24);
+#else
+				in = *((JSUTF32 *) io);
+				ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f);
+#endif
+				if (ucs < 0x10000)
+				{
+					enc->offset += (of - enc->offset);
+					SetError (obj, enc, "Overlong 4 byte UTF-8 sequence detected when encoding string");
+					return FALSE;
+				}
+
+				io += 4;
+				break;
+			}
+
+
+			case 5:
+			case 6:
+				enc->offset += (of - enc->offset);
+				SetError (obj, enc, "Unsupported UTF-8 sequence length when encoding string");
+				return FALSE;
+
+			case 30:
+				// \uXXXX encode
+				*(of++) = '\\';
+				*(of++) = 'u';
+				*(of++) = '0';
+				*(of++) = '0';
+				*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
+				*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
+				io ++;
+				continue;
+
+			case 10:
+			case 12:
+			case 14:
+			case 16:
+			case 18:
+			case 20:
+			case 22:
+			case 24:
+				*(of++) = *( (char *) (g_escapeChars + utflen + 0));
+				*(of++) = *( (char *) (g_escapeChars + utflen + 1));
+				io ++;
+				continue;
+		}
+
+		/*
+		If the character is a UTF8 sequence of length > 1 we end up here */
+		if (ucs >= 0x10000)
+		{
+			ucs -= 0x10000;
+			*(of++) = '\\';
+			*(of++) = 'u';
+			Buffer_AppendShortHexUnchecked(of, (ucs >> 10) + 0xd800);
+			of += 4;
+
+			*(of++) = '\\';
+			*(of++) = 'u';
+			Buffer_AppendShortHexUnchecked(of, (ucs & 0x3ff) + 0xdc00);
+			of += 4;
+		}
+		else
+		{
+			*(of++) = '\\';
+			*(of++) = 'u';
+			Buffer_AppendShortHexUnchecked(of, ucs);
+			of += 4;
+		}
+	}
+
+	return FALSE;
+}
+
+#define Buffer_Reserve(__enc, __len) \
+	if ((__enc)->offset + (__len) > (__enc)->end)	\
+	{	\
+		Buffer_Realloc((__enc), (__len));\
+	}	\
+
+
+#define Buffer_AppendCharUnchecked(__enc, __chr) \
+				*((__enc)->offset++) = __chr; \
+
+FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end)
+{
+	char aux;
+	while (end > begin)
+	aux = *end, *end-- = *begin, *begin++ = aux;
+}
+
+void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value)
+{
+	char* wstr;
+	JSUINT32 uvalue = (value < 0) ? -value : value;
+
+	wstr = enc->offset;
+	// Conversion. Number is reversed.
+	
+	do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
+	if (value < 0) *wstr++ = '-';
+
+	// Reverse string
+	strreverse(enc->offset,wstr - 1);
+	enc->offset += (wstr - (enc->offset));
+}
+
+void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value)
+{
+	char* wstr;
+	JSUINT64 uvalue = (value < 0) ? -value : value;
+
+	wstr = enc->offset;
+	// Conversion. Number is reversed.
+	
+	do *wstr++ = (char)(48 + (uvalue % 10ULL)); while(uvalue /= 10ULL);
+	if (value < 0) *wstr++ = '-';
+
+	// Reverse string
+	strreverse(enc->offset,wstr - 1);
+	enc->offset += (wstr - (enc->offset));
+}
+
+int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value)
+{
+	/* if input is larger than thres_max, revert to exponential */
+	const double thres_max = (double) 1e16 - 1;
+	int count;
+	double diff = 0.0;
+	char* str = enc->offset;
+	char* wstr = str;
+	unsigned long whole;
+	double tmp;
+	unsigned long frac;
+	int neg;
+	double pow10;
+
+	if (value == HUGE_VAL || value == -HUGE_VAL)
+	{
+		SetError (obj, enc, "Invalid Inf value when encoding double");
+		return FALSE;
+	}
+	if (! (value == value)) 
+	{
+		SetError (obj, enc, "Invalid Nan value when encoding double");
+		return FALSE;
+	}
+
+
+	/* we'll work in positive values and deal with the
+	negative sign issue later */
+	neg = 0;
+	if (value < 0) 
+	{
+		neg = 1;
+		value = -value;
+	}
+
+	pow10 = g_pow10[enc->doublePrecision];
+
+	whole = (unsigned long) value;
+	tmp = (value - whole) * pow10;
+	frac = (unsigned long)(tmp);
+	diff = tmp - frac;
+
+	if (diff > 0.5) 
+	{
+		++frac;
+		/* handle rollover, e.g.  case 0.99 with prec 1 is 1.0  */
+		if (frac >= pow10) 
+		{
+			frac = 0;
+			++whole;
+		}
+	} 
+	else 
+	if (diff == 0.5 && ((frac == 0) || (frac & 1))) 
+	{
+		/* if halfway, round up if odd, OR
+		if last digit is 0.  That last part is strange */
+		++frac;
+	}
+
+	/* for very large numbers switch back to native sprintf for exponentials.
+	anyone want to write code to replace this? */
+	/*
+	normal printf behavior is to print EVERY whole number digit
+	which can be 100s of characters overflowing your buffers == bad
+	*/
+	if (value > thres_max) 
+	{
+		enc->offset += sprintf(str, "%.15e", neg ? -value : value);
+		return TRUE;
+	}
+
+	if (enc->doublePrecision == 0) 
+	{
+		diff = value - whole;
+
+		if (diff > 0.5) 
+		{
+		/* greater than 0.5, round up, e.g. 1.6 -> 2 */
+		++whole;
+		}
+		else 
+		if (diff == 0.5 && (whole & 1)) 
+		{
+			/* exactly 0.5 and ODD, then round up */
+			/* 1.5 -> 2, but 2.5 -> 2 */
+			++whole;
+		}
+
+			//vvvvvvvvvvvvvvvvvvv  Diff from modp_dto2
+	} 
+	else 
+	if (frac) 
+	{ 
+		count = enc->doublePrecision;
+		// now do fractional part, as an unsigned number
+		// we know it is not 0 but we can have leading zeros, these
+		// should be removed
+		while (!(frac % 10))
+		{
+		--count;
+		frac /= 10;
+		}
+		//^^^^^^^^^^^^^^^^^^^  Diff from modp_dto2
+
+		// now do fractional part, as an unsigned number
+		do 
+		{
+			--count;
+			*wstr++ = (char)(48 + (frac % 10));
+		} while (frac /= 10);
+		// add extra 0s
+		while (count-- > 0)
+		{
+			*wstr++ = '0';
+		}
+		// add decimal
+		*wstr++ = '.';
+	}
+	else
+	{
+		*wstr++ = '0';
+		*wstr++ = '.';
+	}
+
+	// do whole part
+	// Take care of sign
+	// Conversion. Number is reversed.
+	do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
+	
+	if (neg) 
+	{
+		*wstr++ = '-';
+	}
+	strreverse(str, wstr-1);
+	enc->offset += (wstr - (enc->offset));
+
+	return TRUE;
+}
+
+
+
+
+
+
+/*
+FIXME:
+Handle integration functions returning NULL here */
+
+/*
+FIXME:
+Perhaps implement recursion detection */
+
+void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
+{
+	JSONTypeContext tc;
+	tc.encoder = enc;
+	size_t szlen;
+
+	if (enc->level > enc->recursionMax)
+	{
+		SetError (obj, enc, "Maximum recursion level reached");
+		return;
+	}
+
+	/*
+	This reservation must hold 
+
+	length of _name as encoded worst case +
+	maxLength of double to string OR maxLength of JSLONG to string
+
+	Since input is assumed to be UTF-8 the worst character length is:
+
+	4 bytes (of UTF-8) => "\uXXXX\uXXXX" (12 bytes)
+	*/
+
+	Buffer_Reserve(enc, 256 + (((cbName / 4) + 1) * 12));
+
+	if (name)
+	{
+		Buffer_AppendCharUnchecked(enc, '\"');
+
+		if (enc->forceASCII)
+		{
+			if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName))
+			{
+				return;
+			}
+		}
+		else
+		{
+			if (!Buffer_EscapeStringUnvalidated(obj, enc, name, name + cbName))
+			{
+				return;
+			}
+		}
+
+
+		Buffer_AppendCharUnchecked(enc, '\"');
+
+		Buffer_AppendCharUnchecked (enc, ':');
+#ifndef JSON_NO_EXTRA_WHITESPACE
+		Buffer_AppendCharUnchecked (enc, ' ');
+#endif
+	}
+
+	enc->beginTypeContext(obj, &tc);
+
+	switch (tc.type)
+	{
+		case JT_INVALID:
+			return;
+
+		case JT_ARRAY:
+		{
+			int count = 0;
+			JSOBJ iterObj;
+			enc->iterBegin(obj, &tc);
+
+			Buffer_AppendCharUnchecked (enc, '[');
+
+			while (enc->iterNext(obj, &tc))
+			{
+				if (count > 0)
+				{
+					Buffer_AppendCharUnchecked (enc, ',');
+#ifndef JSON_NO_EXTRA_WHITESPACE
+					Buffer_AppendCharUnchecked (buffer, ' ');
+#endif
+				}
+
+				iterObj = enc->iterGetValue(obj, &tc);
+
+				enc->level ++;
+				encode (iterObj, enc, NULL, 0);			
+				count ++;
+			}
+
+			enc->iterEnd(obj, &tc);
+			Buffer_AppendCharUnchecked (enc, ']');
+			break;
+		}
+
+		case JT_OBJECT:
+		{
+			int count = 0;
+			JSOBJ iterObj;
+			char *objName;
+
+			enc->iterBegin(obj, &tc);
+
+			Buffer_AppendCharUnchecked (enc, '{');
+
+			while (enc->iterNext(obj, &tc))
+			{
+				if (count > 0)
+				{
+					Buffer_AppendCharUnchecked (enc, ',');
+#ifndef JSON_NO_EXTRA_WHITESPACE
+					Buffer_AppendCharUnchecked (enc, ' ');
+#endif
+				}
+
+				iterObj = enc->iterGetValue(obj, &tc);
+				objName = enc->iterGetName(obj, &tc, &szlen);
+
+				enc->level ++;
+				encode (iterObj, enc, objName, szlen);			
+				count ++;
+			}
+
+			enc->iterEnd(obj, &tc);
+			Buffer_AppendCharUnchecked (enc, '}');
+			break;
+		}
+
+		case JT_LONG:
+		{
+			Buffer_AppendLongUnchecked (enc, enc->getLongValue(obj, &tc));
+			break;
+		}
+
+		case JT_INT:
+		{
+			Buffer_AppendIntUnchecked (enc, enc->getIntValue(obj, &tc));
+			break;
+		}
+
+		case JT_TRUE:
+		{
+			Buffer_AppendCharUnchecked (enc, 't');
+			Buffer_AppendCharUnchecked (enc, 'r');
+			Buffer_AppendCharUnchecked (enc, 'u');
+			Buffer_AppendCharUnchecked (enc, 'e');
+			break;
+		}
+
+		case JT_FALSE:
+		{
+			Buffer_AppendCharUnchecked (enc, 'f');
+			Buffer_AppendCharUnchecked (enc, 'a');
+			Buffer_AppendCharUnchecked (enc, 'l');
+			Buffer_AppendCharUnchecked (enc, 's');
+			Buffer_AppendCharUnchecked (enc, 'e');
+			break;
+		}
+
+
+		case JT_NULL: 
+		{
+			Buffer_AppendCharUnchecked (enc, 'n');
+			Buffer_AppendCharUnchecked (enc, 'u');
+			Buffer_AppendCharUnchecked (enc, 'l');
+			Buffer_AppendCharUnchecked (enc, 'l');
+			break;
+		}
+
+		case JT_DOUBLE:
+		{
+			if (!Buffer_AppendDoubleUnchecked (obj, enc, enc->getDoubleValue(obj, &tc)))
+			{
+				enc->endTypeContext(obj, &tc);
+				enc->level --;
+				return;
+			}
+			break;
+		}
+
+		case JT_UTF8:
+		{
+			const char *value = enc->getStringValue(obj, &tc, &szlen);
+			Buffer_Reserve(enc, ((szlen / 4) + 1) * 12);
+			Buffer_AppendCharUnchecked (enc, '\"');
+
+
+			if (enc->forceASCII)
+			{
+				if (!Buffer_EscapeStringValidated(obj, enc, value, value + szlen))
+				{
+					enc->endTypeContext(obj, &tc);
+					enc->level --;
+					return;
+				}
+			}
+			else
+			{
+				if (!Buffer_EscapeStringUnvalidated(obj, enc, value, value + szlen))
+				{
+					enc->endTypeContext(obj, &tc);
+					enc->level --;
+					return;
+				}
+			}
+
+			Buffer_AppendCharUnchecked (enc, '\"');
+			break;
+		}
+	}
+
+	enc->endTypeContext(obj, &tc);
+	enc->level --;
+
+}
+
+char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer)
+{
+	enc->malloc = enc->malloc ? enc->malloc : malloc;
+	enc->free =  enc->free ? enc->free : free;
+	enc->realloc = enc->realloc ? enc->realloc : realloc;
+	enc->errorMsg = NULL;
+	enc->errorObj = NULL;
+	enc->level = 0;
+
+	if (enc->recursionMax < 1)
+	{
+		enc->recursionMax = JSON_MAX_RECURSION_DEPTH;
+	}
+
+	if (enc->doublePrecision < 0 ||
+			enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS)
+	{
+		enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS;
+	}
+
+	if (_buffer == NULL)
+	{
+		_cbBuffer = 32768;
+		enc->start = (char *) enc->malloc (_cbBuffer);
+		enc->heap = 1;
+	}
+	else
+	{
+		enc->start = _buffer;
+		enc->heap = 0;
+	}
+
+	enc->end = enc->start + _cbBuffer;
+	enc->offset = enc->start;
+
+
+	encode (obj, enc, NULL, 0);
+	
+	Buffer_Reserve(enc, 1);
+	Buffer_AppendCharUnchecked(enc, '\0');
+
+	return enc->start;
+}
diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/src/ujson/python/JSONtoObj.c
new file mode 100644
index 0000000000000..faec33f390cc6
--- /dev/null
+++ b/pandas/src/ujson/python/JSONtoObj.c
@@ -0,0 +1,650 @@
+#include <Python.h>
+#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
+#define NO_IMPORT_ARRAY
+#include <numpy/arrayobject.h>
+#include <ultrajson.h>
+
+
+typedef struct __PyObjectDecoder
+{
+	JSONObjectDecoder dec;
+
+	void* npyarr; 		// Numpy context buffer
+	npy_intp curdim; 	// Current array dimension 
+
+	PyArray_Descr* dtype;
+} PyObjectDecoder;
+
+typedef struct __NpyArrContext
+{
+	PyObject* ret;
+	PyObject* labels[2];
+	PyArray_Dims shape;
+
+	PyObjectDecoder* dec;
+
+	npy_intp i;
+	npy_intp elsize;
+	npy_intp elcount;
+} NpyArrContext;
+
+//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__)		
+#define PRINTMARK()
+
+// Numpy handling based on numpy internal code, specifically the function
+// PyArray_FromIter.
+
+// numpy related functions are inter-dependent so declare them all here,
+// to ensure the compiler catches any errors
+
+// standard numpy array handling
+JSOBJ Object_npyNewArray(void* decoder);
+JSOBJ Object_npyEndArray(JSOBJ obj);
+int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value);
+
+// for more complex dtypes (object and string) fill a standard Python list
+// and convert to a numpy array when done.
+JSOBJ Object_npyNewArrayList(void* decoder);
+JSOBJ Object_npyEndArrayList(JSOBJ obj);
+int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value);
+
+// labelled support, encode keys and values of JS object into separate numpy
+// arrays
+JSOBJ Object_npyNewObject(void* decoder);
+JSOBJ Object_npyEndObject(JSOBJ obj);
+int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value);
+
+
+// free the numpy context buffer
+void Npy_releaseContext(NpyArrContext* npyarr) 
+{
+	PRINTMARK();
+	if (npyarr) 
+	{
+		if (npyarr->shape.ptr)
+		{
+			PyObject_Free(npyarr->shape.ptr);
+		}
+		if (npyarr->dec)
+		{
+			// Don't set to null, used to make sure we don't Py_DECREF npyarr
+			// in releaseObject
+			// npyarr->dec->npyarr = NULL;
+			npyarr->dec->curdim = 0;
+		}
+		Py_XDECREF(npyarr->labels[0]);
+		Py_XDECREF(npyarr->labels[1]);
+		Py_XDECREF(npyarr->ret);
+		PyObject_Free(npyarr);
+	}
+}
+
+JSOBJ Object_npyNewArray(void* _decoder)
+{
+	PRINTMARK();
+	PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder;
+	NpyArrContext* npyarr;
+	if (decoder->curdim <= 0)
+	{
+		// start of array - initialise the context buffer
+		npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext));
+
+		if (!npyarr)
+		{
+			PyErr_NoMemory();
+			return NULL;
+		}
+
+		npyarr->dec = decoder;
+		npyarr->labels[0] = npyarr->labels[1] = NULL;
+
+		npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp)*NPY_MAXDIMS);
+		npyarr->shape.len = 1;
+		npyarr->ret = NULL;
+
+		npyarr->elsize = 0;
+		npyarr->elcount = 4;
+		npyarr->i = 0;
+	}
+	else
+	{
+		// starting a new dimension continue the current array (and reshape after) 
+		npyarr = (NpyArrContext*) decoder->npyarr;
+		if (decoder->curdim >= npyarr->shape.len)
+		{
+			npyarr->shape.len++;
+		}
+	}
+
+	npyarr->shape.ptr[decoder->curdim] = 0;
+	decoder->curdim++;
+	return npyarr;
+}
+
+JSOBJ Object_npyEndArray(JSOBJ obj)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return NULL;
+	}
+
+	PyObject* ret = npyarr->ret;
+	int emptyType = NPY_DEFAULT_TYPE;
+	npy_intp i = npyarr->i;
+	char* new_data;
+
+	npyarr->dec->curdim--;
+
+	if (i == 0 || !npyarr->ret) {
+		// empty array would not have been initialised so do it now.
+		if (npyarr->dec->dtype)
+		{
+			emptyType = npyarr->dec->dtype->type_num;
+		}
+		npyarr->ret = ret = PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0);
+	}
+	else if (npyarr->dec->curdim <= 0)
+	{
+		// realloc to final size 
+		new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize);
+		if (new_data == NULL) {
+			PyErr_NoMemory();
+			Npy_releaseContext(npyarr);
+			return NULL;
+		}
+		((char*)PyArray_DATA(ret)) = new_data;
+	}
+
+	if (npyarr->dec->curdim <= 0)
+	{
+		// finished decoding array, reshape if necessary
+		if (npyarr->shape.len > 1)
+		{
+			npyarr->ret = PyArray_Newshape((PyArrayObject*) ret, &npyarr->shape, NPY_ANYORDER);
+			Py_DECREF(ret);
+			ret = npyarr->ret;
+		}
+
+		if (npyarr->labels[0] || npyarr->labels[1])
+		{
+			// finished decoding, build tuple with values and labels
+			ret = PyTuple_New(npyarr->shape.len+1);
+			for (i = 0; i < npyarr->shape.len; i++)
+			{
+				if (npyarr->labels[i])
+				{
+					PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]);
+					npyarr->labels[i] = NULL;
+				}
+				else
+				{
+					Py_INCREF(Py_None);
+					PyTuple_SET_ITEM(ret, i+1, Py_None);
+				}
+			}
+			PyTuple_SET_ITEM(ret, 0, npyarr->ret);
+		}
+		npyarr->ret = NULL;
+		Npy_releaseContext(npyarr);
+	}
+	
+	return ret;
+}
+
+int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return 0;
+	}
+
+	PyObject* type;
+	PyArray_Descr* dtype;
+	npy_intp i = npyarr->i;
+	char *new_data, *item;
+
+	npyarr->shape.ptr[npyarr->dec->curdim-1]++;
+
+	if (PyArray_Check(value))
+	{
+		// multidimensional array, keep decoding values.
+		return 1;
+	}
+
+	if (!npyarr->ret)
+	{
+		// Array not initialised yet.
+		// We do it here so we can 'sniff' the data type if none was provided
+		if (!npyarr->dec->dtype)
+		{
+			type = PyObject_Type(value);
+			if(!PyArray_DescrConverter(type, &dtype)) 
+			{
+				Py_DECREF(type);
+				goto fail;
+			}
+			Py_INCREF(dtype);
+			Py_DECREF(type);
+		}
+		else 
+		{
+			dtype = PyArray_DescrNew(npyarr->dec->dtype);
+		}
+
+		// If it's an object or string then fill a Python list and subsequently 
+		// convert. Otherwise we would need to somehow mess about with 
+		// reference counts when renewing memory.
+		npyarr->elsize = dtype->elsize;
+		if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) 
+		{
+			Py_XDECREF(dtype);
+
+			if (npyarr->dec->curdim > 1) 
+			{
+				PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy");
+				goto fail;
+			}
+			npyarr->ret = PyList_New(0);
+			if (!npyarr->ret) 
+			{
+				goto fail;
+			}
+			((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArrayList;
+			((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayListAddItem;
+			((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArrayList;
+			return Object_npyArrayListAddItem(obj, value);
+		}
+
+		npyarr->ret = PyArray_NewFromDescr(&PyArray_Type, dtype, 1,
+										   &npyarr->elcount, NULL,NULL, 0, NULL);
+
+		if (!npyarr->ret) 
+		{
+			goto fail;
+		}
+	}
+
+	if (i >= npyarr->elcount) {
+		// Grow PyArray_DATA(ret):
+		// this is similar for the strategy for PyListObject, but we use
+		// 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ...
+		if (npyarr->elsize == 0)
+		{
+			PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy");
+			goto fail;
+		}
+
+		npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i;
+		if (npyarr->elcount <= NPY_MAX_INTP/npyarr->elsize) {
+			new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), npyarr->elcount * npyarr->elsize);
+		}
+		else {
+			PyErr_NoMemory();
+			goto fail;
+		}
+		((char*)PyArray_DATA(npyarr->ret)) = new_data;
+	}
+
+	PyArray_DIMS(npyarr->ret)[0] = i + 1;
+
+	if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL
+			|| PyArray_SETITEM(npyarr->ret, item, value) == -1) {
+		goto fail;
+	}
+
+	Py_DECREF( (PyObject *) value);
+	npyarr->i++;
+	return 1;
+
+fail:
+
+	Npy_releaseContext(npyarr);
+	return 0;
+}
+
+JSOBJ Object_npyNewArrayList(void* _decoder)
+{
+	PRINTMARK();
+	PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder;
+	PyErr_SetString(PyExc_ValueError, "nesting not supported for object or variable length dtypes");
+	Npy_releaseContext(decoder->npyarr);
+	return NULL;
+}
+
+JSOBJ Object_npyEndArrayList(JSOBJ obj)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return NULL;
+	}
+
+	// convert decoded list to numpy array
+	PyObject* list = (PyObject *) npyarr->ret;
+	PyObject* ret = PyArray_FROM_O(list);
+
+	((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArray;
+	((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem;
+	((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArray;
+	Npy_releaseContext(npyarr);
+	return ret; 
+}
+
+int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return 0;
+	}
+	PyList_Append((PyObject*) npyarr->ret, value);
+	Py_DECREF( (PyObject *) value);
+	return 1;
+}
+
+
+JSOBJ Object_npyNewObject(void* _decoder)
+{
+	PRINTMARK();
+	PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder;
+	if (decoder->curdim > 1)
+	{
+		PyErr_SetString(PyExc_ValueError, "labels only supported up to 2 dimensions");
+		return NULL;
+	}
+
+	return ((JSONObjectDecoder*)decoder)->newArray(decoder);
+}
+
+JSOBJ Object_npyEndObject(JSOBJ obj)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return NULL;
+	}
+
+	npy_intp labelidx = npyarr->dec->curdim-1;
+
+	PyObject* list = npyarr->labels[labelidx];
+	if (list)
+    {
+        npyarr->labels[labelidx] = PyArray_FROM_O(list);
+        Py_DECREF(list);
+    }
+
+	return (PyObject*) ((JSONObjectDecoder*)npyarr->dec)->endArray(obj);
+}
+
+int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value)
+{
+	PRINTMARK();
+	// add key to label array, value to values array
+	NpyArrContext* npyarr = (NpyArrContext*) obj;
+	if (!npyarr)
+	{
+		return 0;
+	}
+
+	PyObject* label = (PyObject*) name;
+	npy_intp labelidx = npyarr->dec->curdim-1;
+
+	if (!npyarr->labels[labelidx])
+	{
+		npyarr->labels[labelidx] = PyList_New(0);
+	}
+
+	// only fill label array once, assumes all column labels are the same
+	// for 2-dimensional arrays.
+	if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount)
+	{
+		PyList_Append(npyarr->labels[labelidx], label);
+	}
+
+	if(((JSONObjectDecoder*)npyarr->dec)->arrayAddItem(obj, value))
+	{
+		Py_DECREF(label);
+		return 1;
+	}
+	return 0;
+}
+
+int Object_objectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value)
+{
+	PyDict_SetItem (obj, name, value);
+	Py_DECREF( (PyObject *) name);
+	Py_DECREF( (PyObject *) value);
+	return 1;
+}
+
+int Object_arrayAddItem(JSOBJ obj, JSOBJ value)
+{
+	PyList_Append(obj, value);
+	Py_DECREF( (PyObject *) value);
+	return 1;
+}
+
+JSOBJ Object_newString(wchar_t *start, wchar_t *end)
+{
+	return PyUnicode_FromWideChar (start, (end - start));
+}
+
+JSOBJ Object_newTrue(void)
+{ 
+	Py_RETURN_TRUE;
+}
+
+JSOBJ Object_newFalse(void)
+{
+	Py_RETURN_FALSE;
+}
+
+JSOBJ Object_newNull(void)
+{
+	Py_RETURN_NONE;
+}
+
+JSOBJ Object_newObject(void* decoder)
+{
+	return PyDict_New();
+}
+
+JSOBJ Object_endObject(JSOBJ obj)
+{
+	return obj;
+}
+
+JSOBJ Object_newArray(void* decoder)
+{
+	return PyList_New(0);
+}
+
+JSOBJ Object_endArray(JSOBJ obj)
+{
+	return obj;
+}
+
+JSOBJ Object_newInteger(JSINT32 value)
+{
+	return PyInt_FromLong( (long) value);
+}
+
+JSOBJ Object_newLong(JSINT64 value)
+{
+	return PyLong_FromLongLong (value);
+}
+
+JSOBJ Object_newDouble(double value)
+{ 
+	return PyFloat_FromDouble(value);
+}
+
+static void Object_releaseObject(JSOBJ obj, void* _decoder)
+{
+	PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder;
+	if (obj != decoder->npyarr)
+	{
+		Py_XDECREF( ((PyObject *)obj));
+	}
+}
+
+
+PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
+{
+	PRINTMARK();
+	static char *kwlist[] = { "obj", "numpy", "labelled", "dtype", NULL};
+
+	PyObject *ret;
+	PyObject *sarg;
+	PyArray_Descr *dtype = NULL;
+	int numpy = 0, labelled = 0, decref = 0;
+
+	PyObjectDecoder pyDecoder =
+	{
+		{
+			Object_newString,
+			Object_objectAddKey,
+			Object_arrayAddItem,
+			Object_newTrue,
+			Object_newFalse,
+			Object_newNull,
+			Object_newObject,
+			Object_endObject,
+			Object_newArray,
+			Object_endArray,
+			Object_newInteger,
+			Object_newLong,
+			Object_newDouble,
+			Object_releaseObject,
+			PyObject_Malloc,
+			PyObject_Free,
+			PyObject_Realloc,
+		}
+	};
+
+	pyDecoder.curdim = 0;
+	pyDecoder.npyarr = NULL;
+
+	JSONObjectDecoder* decoder = (JSONObjectDecoder*) &pyDecoder;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iiO&", kwlist, &sarg, &numpy, &labelled, PyArray_DescrConverter, &dtype))
+	{
+		return NULL;
+	}
+
+	if (PyUnicode_Check(sarg))
+	{
+		sarg = PyUnicode_AsUTF8String(sarg);
+		if (sarg == NULL)
+		{
+			//Exception raised above us by codec according to docs
+			return NULL;
+		}
+		decref = 1;
+	}
+	else
+	if (!PyString_Check(sarg))
+	{
+		PyErr_Format(PyExc_TypeError, "Expected String or Unicode");
+		return NULL;
+	}
+
+	if (numpy)
+	{
+		pyDecoder.dtype = dtype;
+		decoder->newArray = Object_npyNewArray;
+		decoder->endArray = Object_npyEndArray;
+		decoder->arrayAddItem = Object_npyArrayAddItem;
+
+		if (labelled)
+		{
+			decoder->newObject = Object_npyNewObject;
+			decoder->endObject = Object_npyEndObject;
+			decoder->objectAddKey = Object_npyObjectAddKey;
+		}
+	}
+
+	decoder->errorStr = NULL;
+	decoder->errorOffset = NULL;
+
+	PRINTMARK();
+	ret = JSON_DecodeObject(decoder, PyString_AS_STRING(sarg), PyString_GET_SIZE(sarg));
+	PRINTMARK();
+
+	if (decref)
+	{
+		Py_DECREF(sarg);
+	}
+
+	if (PyErr_Occurred())
+	{
+		return NULL;
+	}
+
+	if (decoder->errorStr)
+	{
+		/*FIXME: It's possible to give a much nicer error message here with actual failing element in input etc*/
+		PyErr_Format (PyExc_ValueError, "%s", decoder->errorStr);
+		Py_XDECREF( (PyObject *) ret);
+		Npy_releaseContext(pyDecoder.npyarr);
+
+		return NULL;
+	}
+
+	return ret;
+}
+
+PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs)
+{
+	PyObject *file;
+	PyObject *read;
+	PyObject *string;
+	PyObject *result;
+	PyObject *argtuple;
+
+	if (!PyArg_ParseTuple (args, "O", &file)) {
+		return NULL;
+	}
+
+	if (!PyObject_HasAttrString (file, "read"))
+	{
+		PyErr_Format (PyExc_TypeError, "expected file");
+		return NULL;
+	}
+
+	read = PyObject_GetAttrString (file, "read");
+
+	if (!PyCallable_Check (read)) {
+		Py_XDECREF(read);
+		PyErr_Format (PyExc_TypeError, "expected file");
+		return NULL;
+	}
+
+	string = PyObject_CallObject (read, NULL);
+	Py_XDECREF(read);
+
+	if (string == NULL)
+	{
+		return NULL;
+	}
+
+	argtuple = PyTuple_Pack(1, string);
+
+	result = JSONToObj (self, argtuple, kwargs);
+	Py_XDECREF(string);
+	Py_DECREF(argtuple);
+
+	if (result == NULL) {
+		return NULL;
+	}
+
+	return result;
+}
+
diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c
new file mode 100644
index 0000000000000..3c6a2a929644c
--- /dev/null
+++ b/pandas/src/ujson/python/objToJSON.c
@@ -0,0 +1,1554 @@
+#include <Python.h>
+#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
+#include <numpy/arrayobject.h>
+#include <numpy/halffloat.h>
+#include <stdio.h>
+#include <datetime.h>
+#include <ultrajson.h>
+
+#define EPOCH_ORD 719163
+
+static PyObject* cls_dataframe;
+static PyObject* cls_series;
+static PyObject* cls_index;
+
+typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, void *outValue, size_t *_outLen);
+
+
+#if (PY_VERSION_HEX < 0x02050000)
+typedef ssize_t Py_ssize_t;
+#endif
+
+typedef struct __NpyArrContext
+{
+	PyObject *array;
+	char* dataptr;
+	int curdim;         // current dimension in array's order
+	int stridedim;      // dimension we are striding over
+	int inc;            // stride dimension increment (+/- 1)
+	npy_intp dim;
+	npy_intp stride;
+	npy_intp ndim;
+	npy_intp index[NPY_MAXDIMS];
+	PyArray_GetItemFunc* getitem;
+
+	char** rowLabels;
+	char** columnLabels;
+} NpyArrContext;
+
+typedef struct __TypeContext
+{
+	JSPFN_ITERBEGIN iterBegin;
+	JSPFN_ITEREND iterEnd;
+	JSPFN_ITERNEXT iterNext;
+	JSPFN_ITERGETNAME iterGetName;
+	JSPFN_ITERGETVALUE iterGetValue;
+	PFN_PyTypeToJSON PyTypeToJSON;
+	PyObject *newObj;
+	PyObject *dictObj;
+	Py_ssize_t index;
+	Py_ssize_t size;
+	PyObject *itemValue;
+	PyObject *itemName;
+	PyObject *attrList;
+	char *citemName;
+
+	JSINT64 longValue;
+
+	NpyArrContext *npyarr;
+	int transpose;
+	char** rowLabels;
+	char** columnLabels;
+	npy_intp rowLabelsLen;
+	npy_intp columnLabelsLen;
+
+} TypeContext;
+
+typedef struct __PyObjectEncoder
+{
+	JSONObjectEncoder enc;
+
+	// pass through the NpyArrContext when encoding multi-dimensional arrays
+	NpyArrContext* npyCtxtPassthru;
+
+	// output format style for pandas data types
+	int outputFormat;
+} PyObjectEncoder;
+
+#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv))
+
+struct PyDictIterState
+{
+	PyObject *keys;
+	size_t i;
+	size_t sz;
+};
+
+enum PANDAS_FORMAT
+{
+	SPLIT,
+	RECORDS,
+	INDEX,
+	COLUMNS,
+	VALUES
+};
+
+//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__)		
+#define PRINTMARK()
+
+void initObjToJSON(void)
+{
+	PyDateTime_IMPORT;
+
+	PyObject *mod_frame = PyImport_ImportModule("pandas.core.frame");
+	cls_dataframe = PyObject_GetAttrString(mod_frame, "DataFrame");
+	cls_index = PyObject_GetAttrString(mod_frame, "Index");
+	cls_series = PyObject_GetAttrString(mod_frame, "Series");
+	Py_DECREF(mod_frame);
+
+	/* Initialise numpy API */
+	import_array();
+}
+
+static void *PyIntToINT32(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	*((JSINT32 *) outValue) = PyInt_AS_LONG (obj);
+	return NULL;
+}
+
+static void *PyIntToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	*((JSINT64 *) outValue) = PyInt_AS_LONG (obj);
+	return NULL;
+}
+
+static void *PyLongToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	*((JSINT64 *) outValue) = GET_TC(tc)->longValue;
+	return NULL;
+}
+
+static void *NpyHalfToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	unsigned long ctype;
+	PyArray_ScalarAsCtype(obj, &ctype);
+	*((double *) outValue) = npy_half_to_double (ctype);
+	return NULL;
+}
+
+static void *NpyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	PyArray_CastScalarToCtype(obj, outValue, PyArray_DescrFromType(NPY_DOUBLE));
+	return NULL;
+}
+
+static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	*((double *) outValue) = PyFloat_AS_DOUBLE (obj);
+	return NULL;
+}
+
+static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	*_outLen = PyString_GET_SIZE(obj);
+	return PyString_AS_STRING(obj);
+}
+
+static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	PyObject *newObj = PyUnicode_EncodeUTF8 (PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL);
+
+	GET_TC(tc)->newObj = newObj;
+
+	*_outLen = PyString_GET_SIZE(newObj);
+	return PyString_AS_STRING(newObj);
+}
+
+static void *PyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	int y, m, d, h, mn, s, days;
+
+	y = PyDateTime_GET_YEAR(obj);
+	m = PyDateTime_GET_MONTH(obj);
+	d = PyDateTime_GET_DAY(obj);
+	h = PyDateTime_DATE_GET_HOUR(obj);
+	mn = PyDateTime_DATE_GET_MINUTE(obj);
+	s = PyDateTime_DATE_GET_SECOND(obj);
+
+	days = PyInt_AS_LONG(PyObject_CallMethod(PyDate_FromDate(y, m, 1), "toordinal", NULL)) - EPOCH_ORD + d - 1;
+	*( (JSINT64 *) outValue) = (((JSINT64) ((days * 24 + h) * 60 + mn)) * 60 + s);
+	return NULL;
+}
+
+static void *PyDateToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
+{
+	PyObject *obj = (PyObject *) _obj;
+	int y, m, d, days;
+
+	y = PyDateTime_GET_YEAR(obj);
+	m = PyDateTime_GET_MONTH(obj);
+	d = PyDateTime_GET_DAY(obj);
+
+	days = PyInt_AS_LONG(PyObject_CallMethod(PyDate_FromDate(y, m, 1), "toordinal", NULL)) - EPOCH_ORD + d - 1;
+	*( (JSINT64 *) outValue) = ((JSINT64) days * 86400);
+
+	return NULL;
+}
+
+//=============================================================================
+// Numpy array iteration functions 
+//=============================================================================
+int NpyArr_iterNextNone(JSOBJ _obj, JSONTypeContext *tc)
+{
+	return 0;
+}
+
+void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc)
+{
+	PyArrayObject *obj;
+
+	if (GET_TC(tc)->newObj)
+	{
+		obj = (PyArrayObject *) GET_TC(tc)->newObj;
+	}
+	else
+	{
+		obj = (PyArrayObject *) _obj;
+	}
+
+	if (PyArray_SIZE(obj) > 0)
+	{
+		PRINTMARK();
+		NpyArrContext *npyarr = PyMem_Malloc(sizeof(NpyArrContext));
+		GET_TC(tc)->npyarr = npyarr;
+
+		if (!npyarr)
+		{
+			PyErr_NoMemory();
+			GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+			return;
+		}
+
+		npyarr->array = (PyObject*) obj;
+		npyarr->getitem = (PyArray_GetItemFunc*) PyArray_DESCR(obj)->f->getitem;
+		npyarr->dataptr = PyArray_DATA(obj);
+		npyarr->ndim = PyArray_NDIM(obj) - 1;
+		npyarr->curdim = 0;
+
+		if (GET_TC(tc)->transpose)
+		{
+			npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
+			npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
+			npyarr->stridedim = npyarr->ndim;
+			npyarr->index[npyarr->ndim] = 0;
+			npyarr->inc = -1;
+		}
+		else
+		{
+			npyarr->dim = PyArray_DIM(obj, 0);
+			npyarr->stride = PyArray_STRIDE(obj, 0);
+			npyarr->stridedim = 0;
+			npyarr->index[0] = 0;
+			npyarr->inc = 1;
+		}
+
+		npyarr->columnLabels = GET_TC(tc)->columnLabels;
+		npyarr->rowLabels = GET_TC(tc)->rowLabels;
+	}
+	else 
+	{
+		GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+	}
+	PRINTMARK();
+}
+
+void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->npyarr)
+	{ 
+		PyMem_Free(GET_TC(tc)->npyarr);
+	}
+	Py_XDECREF(GET_TC(tc)->newObj);
+	PRINTMARK();
+}   
+
+void NpyArrPassThru_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+}
+
+void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	PRINTMARK();   
+	// finished this dimension, reset the data pointer
+	NpyArrContext* npyarr = GET_TC(tc)->npyarr;
+	npyarr->curdim--;
+	npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim];
+	npyarr->stridedim -= npyarr->inc;
+	npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
+	npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
+	npyarr->dataptr += npyarr->stride;    
+}  
+
+int NpyArr_iterNextItem(JSOBJ _obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+	NpyArrContext* npyarr = GET_TC(tc)->npyarr;
+
+	if (npyarr->index[npyarr->stridedim] >= npyarr->dim) 
+	{
+		return 0;
+	}
+
+	GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
+
+	npyarr->dataptr += npyarr->stride;
+	npyarr->index[npyarr->stridedim]++;
+	return 1;
+}
+
+int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+	NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+
+	if (npyarr->curdim >= npyarr->ndim || npyarr->index[npyarr->stridedim] >= npyarr->dim)
+	{
+		// innermost dimension, start retrieving item values
+		GET_TC(tc)->iterNext = NpyArr_iterNextItem;
+		return NpyArr_iterNextItem(_obj, tc);
+	}
+
+	// dig a dimension deeper
+	npyarr->index[npyarr->stridedim]++;
+
+	npyarr->curdim++;
+	npyarr->stridedim += npyarr->inc;
+	npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
+	npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
+	npyarr->index[npyarr->stridedim] = 0;
+
+	((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr;
+	GET_TC(tc)->itemValue = npyarr->array;
+	return 1;
+}
+
+JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+	return GET_TC(tc)->itemValue;
+}
+
+char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	PRINTMARK();
+	NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+	npy_intp idx;
+	if (GET_TC(tc)->iterNext == NpyArr_iterNextItem)
+	{
+		idx = npyarr->index[npyarr->stridedim] - 1;
+		*outLen = strlen(npyarr->columnLabels[idx]);
+		return npyarr->columnLabels[idx];
+	}
+	else
+	{
+		idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
+		*outLen = strlen(npyarr->rowLabels[idx]);
+		return npyarr->rowLabels[idx];
+	}
+}
+
+//=============================================================================
+// Tuple iteration functions 
+// itemValue is borrowed reference, no ref counting
+//=============================================================================
+void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index = 0;
+	GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj);
+	GET_TC(tc)->itemValue = NULL;
+}
+
+int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	PyObject *item;
+
+	if (GET_TC(tc)->index >= GET_TC(tc)->size)
+	{
+		return 0;
+	}
+
+	item = PyTuple_GET_ITEM (obj, GET_TC(tc)->index);
+
+	GET_TC(tc)->itemValue = item;
+	GET_TC(tc)->index ++;
+	return 1;
+}
+
+void Tuple_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+}
+
+JSOBJ Tuple_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	return NULL;
+}
+
+//=============================================================================
+// Dir iteration functions 
+// itemName ref is borrowed from PyObject_Dir (attrList). No refcount
+// itemValue ref is from PyObject_GetAttr. Ref counted
+//=============================================================================
+void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->attrList = PyObject_Dir(obj); 
+	GET_TC(tc)->index = 0;
+	GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList);
+	PRINTMARK();
+}
+
+void Dir_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->itemValue)
+	{
+		Py_DECREF(GET_TC(tc)->itemValue);
+		GET_TC(tc)->itemValue = NULL;
+	}
+
+	Py_DECREF( (PyObject *) GET_TC(tc)->attrList);
+	PRINTMARK();
+}
+
+int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc)
+{
+	PyObject *obj = (PyObject *) _obj;
+	PyObject *itemValue = GET_TC(tc)->itemValue;
+	PyObject *itemName = NULL;
+
+
+	if (itemValue)
+	{
+		Py_DECREF(GET_TC(tc)->itemValue);
+		GET_TC(tc)->itemValue = itemValue = NULL;
+	}
+
+	for (; GET_TC(tc)->index  < GET_TC(tc)->size; GET_TC(tc)->index ++)
+	{
+		PyObject* attr = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index);
+		char* attrStr = PyString_AS_STRING(attr);
+
+		if (attrStr[0] == '_')
+		{
+			PRINTMARK();
+			continue;
+		}
+
+		itemValue = PyObject_GetAttr(obj, attr);
+		if (itemValue == NULL)
+		{
+			PyErr_Clear();
+			PRINTMARK();
+			continue;
+		}
+
+		if (PyCallable_Check(itemValue))
+		{
+			Py_DECREF(itemValue);
+			PRINTMARK();
+			continue;
+		}
+
+		PRINTMARK();
+		itemName = attr;
+		break;
+	}
+
+	if (itemName == NULL)
+	{
+		GET_TC(tc)->index = GET_TC(tc)->size;
+		GET_TC(tc)->itemValue = NULL;
+		return 0;
+	}
+
+	GET_TC(tc)->itemName = itemName;
+	GET_TC(tc)->itemValue = itemValue;
+	GET_TC(tc)->index ++;
+	
+	PRINTMARK();
+	return 1;
+}
+
+
+
+JSOBJ Dir_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+	return GET_TC(tc)->itemValue;
+}
+
+char *Dir_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	PRINTMARK();
+	*outLen = PyString_GET_SIZE(GET_TC(tc)->itemName);
+	return PyString_AS_STRING(GET_TC(tc)->itemName);
+}
+
+
+
+
+//=============================================================================
+// List iteration functions 
+// itemValue is borrowed from object (which is list). No refcounting
+//=============================================================================
+void List_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index =  0;
+	GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj);
+}
+
+int List_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->index >= GET_TC(tc)->size)
+	{
+		PRINTMARK();
+		return 0;
+	}
+
+	GET_TC(tc)->itemValue = PyList_GET_ITEM (obj, GET_TC(tc)->index);
+	GET_TC(tc)->index ++;
+	return 1;
+}
+
+void List_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+}
+
+JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	return NULL;
+}
+
+//=============================================================================
+// pandas Index iteration functions 
+//=============================================================================
+void Index_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index = 0;
+	GET_TC(tc)->citemName = PyMem_Malloc(20 * sizeof(char));
+	if (!GET_TC(tc)->citemName)
+	{
+		PyErr_NoMemory();
+	}
+	PRINTMARK();
+}
+
+int Index_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (!GET_TC(tc)->citemName)
+	{
+		return 0;
+	}
+
+	Py_ssize_t index = GET_TC(tc)->index;
+	Py_XDECREF(GET_TC(tc)->itemValue);
+	if (index == 0)
+	{
+		memcpy(GET_TC(tc)->citemName, "name", 5);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
+	}
+	else
+	if (index == 1)
+	{
+		memcpy(GET_TC(tc)->citemName, "data", 5);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values");
+	}
+	else 
+	{
+		PRINTMARK();
+		return 0;
+	}
+
+	GET_TC(tc)->index++;
+	PRINTMARK();
+	return 1;
+}
+
+void Index_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->citemName)
+	{
+		PyMem_Free(GET_TC(tc)->citemName);
+	}
+	PRINTMARK();
+}
+
+JSOBJ Index_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *Index_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	*outLen = strlen(GET_TC(tc)->citemName);
+	return GET_TC(tc)->citemName;
+}       
+
+//=============================================================================
+// pandas Series iteration functions 
+//=============================================================================
+void Series_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index = 0;
+	GET_TC(tc)->citemName = PyMem_Malloc(20 * sizeof(char));
+	if (!GET_TC(tc)->citemName)
+	{
+		PyErr_NoMemory();
+	}
+	PRINTMARK();
+}
+
+int Series_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (!GET_TC(tc)->citemName)
+	{
+		return 0;
+	}
+
+	Py_ssize_t index = GET_TC(tc)->index;
+	Py_XDECREF(GET_TC(tc)->itemValue);
+	if (index == 0)
+	{
+		memcpy(GET_TC(tc)->citemName, "name", 5);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
+	}
+	else
+	if (index == 1)
+	{
+		memcpy(GET_TC(tc)->citemName, "index", 6);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
+	}
+	else
+	if (index == 2)
+	{
+		memcpy(GET_TC(tc)->citemName, "data", 5);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values");
+	}
+	else 
+	{
+		PRINTMARK();
+		return 0;
+	}
+
+	GET_TC(tc)->index++;
+	PRINTMARK();
+	return 1;
+}
+
+void Series_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->citemName)
+	{
+		PyMem_Free(GET_TC(tc)->citemName);
+	}
+	PRINTMARK();
+}
+
+JSOBJ Series_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *Series_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	*outLen = strlen(GET_TC(tc)->citemName);
+	return GET_TC(tc)->citemName;
+}       
+
+//=============================================================================
+// pandas DataFrame iteration functions 
+//=============================================================================
+void DataFrame_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index = 0;
+	GET_TC(tc)->citemName = PyMem_Malloc(20 * sizeof(char));
+	if (!GET_TC(tc)->citemName)
+	{
+		PyErr_NoMemory();
+	}
+	PRINTMARK();
+}
+
+int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (!GET_TC(tc)->citemName)
+	{
+		return 0;
+	}
+
+	Py_ssize_t index = GET_TC(tc)->index;
+	Py_XDECREF(GET_TC(tc)->itemValue);
+	if (index == 0)
+	{
+		memcpy(GET_TC(tc)->citemName, "columns", 8);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
+	}
+	else
+	if (index == 1)
+	{
+		memcpy(GET_TC(tc)->citemName, "index", 6);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
+	}
+	else
+	if (index == 2)
+	{
+		memcpy(GET_TC(tc)->citemName, "data", 5);
+		GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values");
+	}
+	else 
+	{
+		PRINTMARK();
+		return 0;
+	}
+
+	GET_TC(tc)->index++;
+	PRINTMARK();
+	return 1;
+}
+
+void DataFrame_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->citemName)
+	{
+		PyMem_Free(GET_TC(tc)->citemName);
+	}
+	PRINTMARK();
+}
+
+JSOBJ DataFrame_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *DataFrame_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	*outLen = strlen(GET_TC(tc)->citemName);
+	return GET_TC(tc)->citemName;
+}       
+
+//=============================================================================
+// Dict iteration functions 
+// itemName might converted to string (Python_Str). Do refCounting
+// itemValue is borrowed from object (which is dict). No refCounting
+//=============================================================================
+void Dict_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->index = 0;
+	PRINTMARK();
+}
+
+int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->itemName)
+	{
+		Py_DECREF(GET_TC(tc)->itemName);
+		GET_TC(tc)->itemName = NULL;
+	}
+
+
+	if (!PyDict_Next ( (PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue))
+	{
+		PRINTMARK();
+		return 0;
+	}
+
+	if (PyUnicode_Check(GET_TC(tc)->itemName))
+	{
+		GET_TC(tc)->itemName = PyUnicode_EncodeUTF8 (
+			PyUnicode_AS_UNICODE(GET_TC(tc)->itemName),
+			PyUnicode_GET_SIZE(GET_TC(tc)->itemName),
+			NULL
+		);
+	}
+	else
+	if (!PyString_Check(GET_TC(tc)->itemName))
+	{
+		GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName);
+	}
+	else 
+	{
+		Py_INCREF(GET_TC(tc)->itemName);
+	}
+	PRINTMARK();
+	return 1;
+}
+
+void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	if (GET_TC(tc)->itemName)
+	{
+		Py_DECREF(GET_TC(tc)->itemName);
+		GET_TC(tc)->itemName = NULL;
+	}
+	Py_DECREF(GET_TC(tc)->dictObj);
+	PRINTMARK();
+}
+
+JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->itemValue;
+}
+
+char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	*outLen = PyString_GET_SIZE(GET_TC(tc)->itemName);
+	return PyString_AS_STRING(GET_TC(tc)->itemName);
+}
+
+void NpyArr_freeLabels(char** labels, npy_intp len)
+{
+	npy_intp i;
+
+	if (labels) 
+	{
+		for (i = 0; i < len; i++)
+		{
+			PyMem_Free(labels[i]);
+		}
+		PyMem_Free(labels);
+	}
+}
+
+char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_intp num)
+{
+	PRINTMARK();
+	npy_intp i, stride, bufsize, len;
+	char** ret;
+	char *dataptr, *cLabel;
+	PyArray_GetItemFunc* getitem;
+
+	if (PyArray_SIZE(labels) < num)
+	{
+		PyErr_SetString(PyExc_ValueError, "Label array sizes do not match corresponding data shape");
+		return 0;
+	}
+
+	ret = PyMem_Malloc(sizeof(char*)*num);
+	if (!ret)
+	{
+		PyErr_NoMemory();
+		return 0;
+	}
+
+	bufsize = enc->end - enc->start;
+	stride = PyArray_STRIDE(labels, 0);
+	dataptr = PyArray_DATA(labels);  
+	getitem = PyArray_DESCR(labels)->f->getitem;
+
+	for (i = 0; i < num; i++)
+	{
+		cLabel = JSON_EncodeObject(getitem(dataptr, labels), enc, enc->start, bufsize);
+
+		// trim off any quotes surrounding the result
+		if (*cLabel == '\"')
+		{    
+			cLabel++;
+			enc->offset -= 2;
+			*(enc->offset) = '\0';
+		}
+
+		len = enc->offset - cLabel + 1;
+		ret[i] = PyMem_Malloc(sizeof(char)*len);
+
+		if (!ret[i])
+		{
+			PyErr_NoMemory();
+			return 0;
+		}
+
+		memcpy(ret[i], cLabel, len);
+		dataptr += stride;
+	}
+
+	enc->offset = enc->start;
+	return ret;
+}
+
+void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
+{
+	PRINTMARK();
+	if (!_obj) {
+		tc->type = JT_INVALID;
+		return;
+	}
+
+	PyObject* obj = (PyObject*) _obj;
+	TypeContext *pc = (TypeContext *) tc->prv;
+	PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder;
+	PyObject *toDictFunc;
+
+	int i;
+	for (i = 0; i < 32; i++) 
+	{
+		tc->prv[i] = 0;
+	}
+
+	if (PyIter_Check(obj) || PyArray_Check(obj))
+	{
+		goto ISITERABLE;
+	}
+
+	if (PyBool_Check(obj))
+	{
+		PRINTMARK();
+		tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE;
+		return;
+	}
+	else
+	if (PyInt_Check(obj))
+	{
+		PRINTMARK();
+#ifdef _LP64
+		pc->PyTypeToJSON = PyIntToINT64; tc->type = JT_LONG;
+#else
+		pc->PyTypeToJSON = PyIntToINT32; tc->type = JT_INT;
+#endif
+		return;
+	}
+	else 
+	if (PyLong_Check(obj))
+	{
+		PyObject *exc;
+
+		PRINTMARK();
+		pc->PyTypeToJSON = PyLongToINT64; 
+		tc->type = JT_LONG;
+		GET_TC(tc)->longValue = PyLong_AsLongLong(obj);
+
+		exc = PyErr_Occurred();
+
+		if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
+		{
+			PRINTMARK();
+			tc->type = JT_INVALID;
+			return;
+		}
+
+		return;
+	}
+	else 
+	if (PyArray_IsScalar(obj, Integer))
+	{
+		PyObject *exc;
+
+		PRINTMARK();
+		pc->PyTypeToJSON = PyLongToINT64; 
+		tc->type = JT_LONG;
+		PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), PyArray_DescrFromType(NPY_LONG));
+
+		exc = PyErr_Occurred();
+
+		if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
+		{
+			PRINTMARK();
+			tc->type = JT_INVALID;
+			return;
+		}
+
+		return;
+	}
+	else
+	if (PyString_Check(obj))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
+		return;
+	}
+	else
+	if (PyUnicode_Check(obj))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = PyUnicodeToUTF8; tc->type = JT_UTF8;
+		return;
+	}
+	else
+	if (PyFloat_Check(obj))
+	{
+		PRINTMARK();
+		double val = PyFloat_AS_DOUBLE (obj);
+		if (npy_isnan(val) || npy_isinf(val))
+		{
+			tc->type = JT_NULL;
+		}
+		else 
+		{
+			pc->PyTypeToJSON = PyFloatToDOUBLE; tc->type = JT_DOUBLE;
+		}
+		return;
+	}
+	else
+	if (PyArray_IsScalar(obj, Float))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = NpyFloatToDOUBLE; tc->type = JT_DOUBLE;
+		return;
+	}
+	else
+	if (PyArray_IsScalar(obj, Half))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = NpyHalfToDOUBLE; tc->type = JT_DOUBLE;
+		return;
+	}
+	else 
+	if (PyDateTime_Check(obj))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = PyDateTimeToINT64; tc->type = JT_LONG;
+		return;
+	}
+	else 
+	if (PyDate_Check(obj))
+	{
+		PRINTMARK();
+		pc->PyTypeToJSON = PyDateToINT64; tc->type = JT_LONG;
+		return;
+	}
+	else
+	if (obj == Py_None)
+	{
+		PRINTMARK();
+		tc->type = JT_NULL;
+		return;
+	}
+
+
+ISITERABLE:
+
+	if (PyDict_Check(obj))
+	{
+		PRINTMARK();
+		tc->type = JT_OBJECT;
+		pc->iterBegin = Dict_iterBegin;
+		pc->iterEnd = Dict_iterEnd;
+		pc->iterNext = Dict_iterNext;
+		pc->iterGetValue = Dict_iterGetValue;
+		pc->iterGetName = Dict_iterGetName;
+		pc->dictObj = obj;
+		Py_INCREF(obj);
+
+		return;
+	}
+	else
+	if (PyList_Check(obj))
+	{
+		PRINTMARK();
+		tc->type = JT_ARRAY;
+		pc->iterBegin = List_iterBegin;
+		pc->iterEnd = List_iterEnd;
+		pc->iterNext = List_iterNext;
+		pc->iterGetValue = List_iterGetValue;
+		pc->iterGetName = List_iterGetName;
+		return;
+	}
+	else
+	if (PyTuple_Check(obj))
+	{
+		PRINTMARK();
+		tc->type = JT_ARRAY;
+		pc->iterBegin = Tuple_iterBegin;
+		pc->iterEnd = Tuple_iterEnd;
+		pc->iterNext = Tuple_iterNext;
+		pc->iterGetValue = Tuple_iterGetValue;
+		pc->iterGetName = Tuple_iterGetName;
+		return;
+	}
+	else
+	if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_index))
+	{
+		if (enc->outputFormat == SPLIT) 
+		{
+			PRINTMARK();
+			tc->type = JT_OBJECT;
+			pc->iterBegin = Index_iterBegin;
+			pc->iterEnd = Index_iterEnd;
+			pc->iterNext = Index_iterNext;
+			pc->iterGetValue = Index_iterGetValue;
+			pc->iterGetName = Index_iterGetName;
+			return;
+		}
+
+		PRINTMARK();
+		tc->type = JT_ARRAY;
+		pc->newObj = PyObject_GetAttrString(obj, "values");
+		pc->iterBegin = NpyArr_iterBegin;
+		pc->iterEnd = NpyArr_iterEnd;
+		pc->iterNext = NpyArr_iterNext;
+		pc->iterGetValue = NpyArr_iterGetValue;
+		pc->iterGetName = NpyArr_iterGetName;
+		return;
+	}
+	else
+	if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_series))
+	{
+		if (enc->outputFormat == SPLIT) 
+		{
+			PRINTMARK();
+			enc->outputFormat = RECORDS;  // for contained index
+			tc->type = JT_OBJECT;
+			pc->iterBegin = Series_iterBegin;
+			pc->iterEnd = Series_iterEnd;
+			pc->iterNext = Series_iterNext;
+			pc->iterGetValue = Series_iterGetValue;
+			pc->iterGetName = Series_iterGetName;
+			return;
+		}
+
+		if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS)
+		{
+			PRINTMARK();
+			tc->type = JT_OBJECT;
+			pc->columnLabelsLen = PyArray_SIZE(obj);
+			pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen);
+			if (!pc->columnLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+		}
+		else
+		{
+			PRINTMARK();
+			tc->type = JT_ARRAY;
+		}
+		pc->newObj = PyObject_GetAttrString(obj, "values");
+		pc->iterBegin = NpyArr_iterBegin;
+		pc->iterEnd = NpyArr_iterEnd;
+		pc->iterNext = NpyArr_iterNext;
+		pc->iterGetValue = NpyArr_iterGetValue;
+		pc->iterGetName = NpyArr_iterGetName;
+		return;
+	}
+	else
+	if (PyArray_Check(obj))
+	{
+		if (enc->npyCtxtPassthru)
+		{
+			PRINTMARK();
+			pc->npyarr = enc->npyCtxtPassthru;
+			tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY);
+			pc->iterBegin = NpyArrPassThru_iterBegin;
+			pc->iterEnd = NpyArrPassThru_iterEnd;
+			pc->iterNext = NpyArr_iterNext;
+			pc->iterGetValue = NpyArr_iterGetValue;
+			pc->iterGetName = NpyArr_iterGetName;
+			enc->npyCtxtPassthru = NULL;
+			return;
+		}
+
+		PRINTMARK();
+		tc->type = JT_ARRAY;
+		pc->iterBegin = NpyArr_iterBegin;
+		pc->iterEnd = NpyArr_iterEnd;
+		pc->iterNext = NpyArr_iterNext;
+		pc->iterGetValue = NpyArr_iterGetValue;
+		pc->iterGetName = NpyArr_iterGetName;
+		return;
+	}
+	else
+	if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_dataframe))
+	{
+		if (enc->outputFormat == SPLIT) 
+		{
+			PRINTMARK();
+			enc->outputFormat = RECORDS; // for contained index and series
+			tc->type = JT_OBJECT;
+			pc->iterBegin = DataFrame_iterBegin;
+			pc->iterEnd = DataFrame_iterEnd;
+			pc->iterNext = DataFrame_iterNext;
+			pc->iterGetValue = DataFrame_iterGetValue;
+			pc->iterGetName = DataFrame_iterGetName;
+			return;
+		}
+
+		PRINTMARK();
+		pc->newObj = PyObject_GetAttrString(obj, "values");
+		pc->iterBegin = NpyArr_iterBegin;
+		pc->iterEnd = NpyArr_iterEnd;
+		pc->iterNext = NpyArr_iterNext;
+		pc->iterGetValue = NpyArr_iterGetValue;
+		pc->iterGetName = NpyArr_iterGetName;
+		if (enc->outputFormat == VALUES)
+		{
+			PRINTMARK();
+			tc->type = JT_ARRAY;
+		}
+		else
+		if (enc->outputFormat == RECORDS)
+		{
+			PRINTMARK();
+			tc->type = JT_ARRAY;
+			pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1);
+			pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen);
+			if (!pc->columnLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+		}
+		else 
+		if (enc->outputFormat == INDEX)
+		{
+			PRINTMARK();
+			tc->type = JT_OBJECT;
+			pc->rowLabelsLen = PyArray_DIM(pc->newObj, 0);
+			pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->rowLabelsLen);
+			if (!pc->rowLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+			pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1);
+			pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen);
+			if (!pc->columnLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+		}
+		else 
+		{
+			PRINTMARK();
+			tc->type = JT_OBJECT;
+			pc->rowLabelsLen = PyArray_DIM(pc->newObj, 1);
+			pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->rowLabelsLen);
+			if (!pc->rowLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+			pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);
+			pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen);
+			if (!pc->columnLabels)
+			{
+				tc->type = JT_INVALID;
+				return;
+			}
+			pc->transpose = 1;
+		}
+		return;
+	}
+
+
+	toDictFunc = PyObject_GetAttrString(obj, "toDict");
+
+	if (toDictFunc)
+	{
+		PyObject* tuple = PyTuple_New(0);
+		PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
+		Py_DECREF(tuple);
+		Py_DECREF(toDictFunc);
+
+		if (toDictResult == NULL)
+		{
+			PyErr_Clear();
+			tc->type = JT_NULL;
+			return;
+		}
+
+		if (!PyDict_Check(toDictResult))
+		{
+			Py_DECREF(toDictResult);
+			tc->type = JT_NULL;
+			return;
+		}
+
+		PRINTMARK();
+		tc->type = JT_OBJECT;
+		pc->iterBegin = Dict_iterBegin;
+		pc->iterEnd = Dict_iterEnd;
+		pc->iterNext = Dict_iterNext;
+		pc->iterGetValue = Dict_iterGetValue;
+		pc->iterGetName = Dict_iterGetName;
+		pc->dictObj = toDictResult;
+		return;
+	}
+
+	PyErr_Clear();
+
+	tc->type = JT_OBJECT;
+	pc->iterBegin = Dir_iterBegin;
+	pc->iterEnd = Dir_iterEnd;
+	pc->iterNext = Dir_iterNext;
+	pc->iterGetValue = Dir_iterGetValue;
+	pc->iterGetName = Dir_iterGetName;
+
+	return;
+}
+
+
+void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc)
+{
+	Py_XDECREF(GET_TC(tc)->newObj);
+	NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen);
+	NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
+}
+
+const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen)
+{
+	return GET_TC(tc)->PyTypeToJSON (obj, tc, NULL, _outLen);
+}
+
+JSINT64 Object_getLongValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	JSINT64 ret;
+	GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL);
+
+	return ret;
+}
+
+JSINT32 Object_getIntValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	JSINT32 ret;
+	GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL);
+	return ret;
+}
+
+
+double Object_getDoubleValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	double ret;
+	GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL);
+	return ret;
+}
+
+static void Object_releaseObject(JSOBJ _obj)
+{
+	Py_DECREF( (PyObject *) _obj);
+}
+
+
+
+void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->iterBegin(obj, tc);
+}
+
+int Object_iterNext(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->iterNext(obj, tc);
+}
+
+void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc)
+{
+	GET_TC(tc)->iterEnd(obj, tc);
+}
+
+JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
+{
+	return GET_TC(tc)->iterGetValue(obj, tc);
+}
+
+char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
+{
+	return GET_TC(tc)->iterGetName(obj, tc, outLen);
+}
+
+
+PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = { "obj", "ensure_ascii", "double_precision", "orient", NULL};
+
+	char buffer[65536];
+	char *ret;
+	PyObject *newobj;
+	PyObject *oinput = NULL;
+	PyObject *oensureAscii = NULL;
+	char *sOrient = NULL;
+	int idoublePrecision = 5; // default double precision setting
+
+	PyObjectEncoder pyEncoder = 
+	{
+		{
+			Object_beginTypeContext,	//void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
+			Object_endTypeContext, //void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc);
+			Object_getStringValue, //const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen);
+			Object_getLongValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
+			Object_getIntValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
+			Object_getDoubleValue, //double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
+			Object_iterBegin, //JSPFN_ITERBEGIN iterBegin;
+			Object_iterNext, //JSPFN_ITERNEXT iterNext;
+			Object_iterEnd, //JSPFN_ITEREND iterEnd;
+			Object_iterGetValue, //JSPFN_ITERGETVALUE iterGetValue;
+			Object_iterGetName, //JSPFN_ITERGETNAME iterGetName;
+			Object_releaseObject, //void (*releaseValue)(JSONTypeContext *ti);
+			PyObject_Malloc, //JSPFN_MALLOC malloc;
+			PyObject_Realloc, //JSPFN_REALLOC realloc;
+			PyObject_Free, //JSPFN_FREE free;
+			-1, //recursionMax
+			idoublePrecision,
+			1, //forceAscii
+		}
+	};
+	JSONObjectEncoder* encoder = (JSONObjectEncoder*) &pyEncoder;
+
+	pyEncoder.npyCtxtPassthru = NULL;
+	pyEncoder.outputFormat = COLUMNS;
+
+	PRINTMARK();
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Ois", kwlist, &oinput, &oensureAscii, &idoublePrecision, &sOrient))
+	{
+		return NULL;
+	}
+
+	if (sOrient != NULL)
+	{
+		if (strcmp(sOrient, "records") == 0)
+		{
+			pyEncoder.outputFormat = RECORDS;
+		} 
+		else
+		if (strcmp(sOrient, "index") == 0)
+		{
+			pyEncoder.outputFormat = INDEX;
+		}
+		else
+		if (strcmp(sOrient, "split") == 0)
+		{
+			pyEncoder.outputFormat = SPLIT;
+		}
+		else
+		if (strcmp(sOrient, "values") == 0)
+		{
+			pyEncoder.outputFormat = VALUES;
+		}
+		else
+		if (strcmp(sOrient, "columns") != 0)
+		{
+			PyErr_Format (PyExc_ValueError, "Invalid value '%s' for option 'orient'", sOrient);
+			return NULL;
+		}
+	}
+
+	if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii))
+	{
+		encoder->forceASCII = 0;
+	}
+
+	encoder->doublePrecision = idoublePrecision;
+
+	PRINTMARK();
+	ret = JSON_EncodeObject (oinput, encoder, buffer, sizeof (buffer));
+	PRINTMARK();
+
+	if (PyErr_Occurred())
+	{
+		return NULL;
+	}
+
+	if (encoder->errorMsg)
+	{
+		if (ret != buffer)
+		{
+			encoder->free (ret);
+		}
+
+		PyErr_Format (PyExc_OverflowError, "%s", encoder->errorMsg);
+		return NULL;
+	}
+
+	newobj = PyString_FromString (ret);
+
+	if (ret != buffer)
+	{
+		encoder->free (ret);
+	}
+
+	PRINTMARK();
+
+	return newobj;
+}
+
+PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs)
+{
+	PyObject *data;
+	PyObject *file;
+	PyObject *string;
+	PyObject *write;
+	PyObject *argtuple;
+
+	PRINTMARK();
+
+	if (!PyArg_ParseTuple (args, "OO", &data, &file)) {
+		return NULL;
+	}
+
+	if (!PyObject_HasAttrString (file, "write"))
+	{
+		PyErr_Format (PyExc_TypeError, "expected file");
+		return NULL;
+	}
+
+	write = PyObject_GetAttrString (file, "write");
+
+	if (!PyCallable_Check (write)) {
+		Py_XDECREF(write);
+		PyErr_Format (PyExc_TypeError, "expected file");
+		return NULL;
+	}
+
+	argtuple = PyTuple_Pack(1, data);
+
+	string = objToJSON (self, argtuple, kwargs);
+
+	if (string == NULL)
+	{
+		Py_XDECREF(write);
+		Py_XDECREF(argtuple);
+		return NULL;
+	}
+
+	Py_XDECREF(argtuple);
+
+	argtuple = PyTuple_Pack (1, string);
+	if (argtuple == NULL)
+	{
+		Py_XDECREF(write);
+		return NULL;
+	}
+	if (PyObject_CallObject (write, argtuple) == NULL)
+	{
+		Py_XDECREF(write);
+		Py_XDECREF(argtuple);
+		return NULL;
+	}
+
+	Py_XDECREF(write);
+	Py_DECREF(argtuple);
+	Py_XDECREF(string);
+
+	PRINTMARK();
+
+	Py_RETURN_NONE;
+	
+
+}
+
diff --git a/pandas/src/ujson/python/ujson.c b/pandas/src/ujson/python/ujson.c
new file mode 100644
index 0000000000000..21f7ba8b106cf
--- /dev/null
+++ b/pandas/src/ujson/python/ujson.c
@@ -0,0 +1,41 @@
+#include <Python.h>
+#include "version.h"
+
+/* objToJSON */
+PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs);
+void initObjToJSON(void);
+
+/* JSONToObj */
+PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs);
+
+/* objToJSONFile */
+PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs);
+
+/* JSONFileToObj */
+PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs);
+
+
+static PyMethodDef ujsonMethods[] = {
+	{"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision with doubles"},
+	{"decode", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure"},
+	{"dumps", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS,  "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"},
+	{"loads", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS,  "Converts JSON as string to dict object structure"},
+	{"dump", (PyCFunction) objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. Use ensure_ascii=false to output UTF-8"},
+	{"load", (PyCFunction) JSONFileToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as file to dict object structure"},
+	{NULL, NULL, 0, NULL}		/* Sentinel */
+};
+
+
+
+PyMODINIT_FUNC
+init_ujson(void)
+{
+	PyObject *module;
+	PyObject *version_string;
+
+	initObjToJSON();
+	module = Py_InitModule("_ujson", ujsonMethods);
+
+	version_string = PyString_FromString (UJSON_VERSION);
+	PyModule_AddObject (module, "__version__", version_string);
+}
diff --git a/pandas/src/ujson/python/version.h b/pandas/src/ujson/python/version.h
new file mode 100644
index 0000000000000..9449441411192
--- /dev/null
+++ b/pandas/src/ujson/python/version.h
@@ -0,0 +1 @@
+#define UJSON_VERSION "1.18"
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index b21bd09957bd7..86a64bdfc4002 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1924,6 +1924,143 @@ def test_to_dict(self):
             for k2, v2 in v.iteritems():
                 self.assertEqual(v2, recons_data[k][k2])
 
+    def test_from_json_to_json(self):
+
+        def _check_orient(df, orient, dtype=None, numpy=True):
+            df = df.sort()
+            dfjson = df.to_json(orient=orient)
+            unser = DataFrame.from_json(dfjson, orient=orient, dtype=dtype,
+                                        numpy=numpy)
+            unser = unser.sort()
+            mktimestamp = datetime.fromtimestamp
+            if df.index.dtype == np.datetime64:
+                unser.index = [mktimestamp(float(d)) for d in unser.index]
+            if orient == "records":
+                # index is not captured in this orientation
+                assert_almost_equal(df.values, unser.values)
+                self.assert_(df.columns.equals(unser.columns))
+            elif orient == "values":
+                # index and cols are not captured in this orientation
+                assert_almost_equal(df.values, unser.values)
+            elif orient == "split":
+                # index and col labels might not be strings
+                unser.index = [str(i) for i in unser.index]
+                unser.columns = [str(i) for i in unser.columns]
+                unser = unser.sort()
+                assert_almost_equal(df.values, unser.values)
+            else:
+                assert_frame_equal(df, unser)
+
+        def _check_all_orients(df, dtype=None):
+            _check_orient(df, "columns", dtype=dtype)
+            _check_orient(df, "records", dtype=dtype)
+            _check_orient(df, "split", dtype=dtype)
+            _check_orient(df, "index", dtype=dtype)
+            _check_orient(df, "values", dtype=dtype)
+
+            _check_orient(df, "columns", dtype=dtype, numpy=False)
+            _check_orient(df, "records", dtype=dtype, numpy=False)
+            _check_orient(df, "split", dtype=dtype, numpy=False)
+            _check_orient(df, "index", dtype=dtype, numpy=False)
+            _check_orient(df, "values", dtype=dtype, numpy=False)
+
+        # basic
+        _check_all_orients(self.frame)
+        self.assertEqual(self.frame.to_json(),
+                         self.frame.to_json(orient="columns"))
+
+        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
+
+        # big one
+        # index and columns are strings as all unserialised JSON object keys
+        # are assumed to be strings
+        biggie = DataFrame(np.zeros((200, 4)),
+                           columns=[str(i) for i in range(4)],
+                           index=[str(i) for i in range(200)])
+        _check_all_orients(biggie)
+
+        # dtypes
+        _check_all_orients(DataFrame(biggie, dtype=np.float64),
+                           dtype=np.float64)
+        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int)
+        _check_all_orients(DataFrame(biggie, dtype='<U3'), dtype='<U3')
+
+        # empty
+        _check_all_orients(self.empty)
+
+        # time series data
+        _check_all_orients(self.tsframe)
+
+        # mixed data
+        index = Index(['a', 'b', 'c', 'd', 'e'])
+        data = {
+            'A': [0., 1., 2., 3., 4.],
+            'B': [0., 1., 0., 1., 0.],
+            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+            'D': [True, False, True, False, True]
+        }
+        df = DataFrame(data=data, index=index)
+        _check_orient(df, "split")
+        _check_orient(df, "records")
+        _check_orient(df, "values")
+        _check_orient(df, "columns")
+        # index oriented is problematic as it is read back in in a transposed
+        # state, so the columns are interpreted as having mixed data and
+        # given object dtypes.
+        # force everything to have object dtype beforehand
+        _check_orient(df.transpose().transpose(), "index")
+
+    def test_from_json_bad_data(self):
+        self.assertRaises(ValueError, DataFrame.from_json, '{"key":b:a:d}')
+
+        # too few indices
+        json = ('{"columns":["A","B"],'
+                '"index":["2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
+        self.assertRaises(AssertionError, DataFrame.from_json, json,
+                          orient="split")
+
+        # too many columns
+        json = ('{"columns":["A","B","C"],'
+                '"index":["1","2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
+        self.assertRaises(AssertionError, DataFrame.from_json, json,
+                          orient="split")
+
+        # bad key
+        json = ('{"badkey":["A","B"],'
+                '"index":["2","3"],'
+                '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
+        self.assertRaises(TypeError, DataFrame.from_json, json,
+                          orient="split")
+
+    def test_from_json_nones(self):
+        df = DataFrame([[1, 2], [4, 5, 6]])
+        unser = DataFrame.from_json(df.to_json())
+        self.assert_(np.isnan(unser['2'][0]))
+
+        df = DataFrame([['1', '2'], ['4', '5', '6']])
+        unser = DataFrame.from_json(df.to_json())
+        self.assert_(np.isnan(unser['2'][0]))
+
+        unser = DataFrame.from_json(df.to_json(), numpy=False)
+        self.assert_(unser['2'][0] is None)
+
+        # infinities get mapped to nulls which get mapped to NaNs during
+        # deserialisation
+        df = DataFrame([[1, 2], [4, 5, 6]])
+        df[2][0] = np.inf
+        unser = DataFrame.from_json(df.to_json())
+        self.assert_(np.isnan(unser['2'][0]))
+
+        df[2][0] = np.NINF
+        unser = DataFrame.from_json(df.to_json())
+        self.assert_(np.isnan(unser['2'][0]))
+
+    def test_to_json_except(self):
+        df = DataFrame([1, 2, 3])
+        self.assertRaises(ValueError, df.to_json, orient="garbage")
+
     def test_from_records_to_records(self):
         # from numpy documentation
         arr = np.zeros((2,),dtype=('i4,f4,a10'))
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 9928a0e9d3a85..f905834473012 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1,6 +1,6 @@
 # pylint: disable-msg=E1101,W0612
 
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, date
 import os
 import operator
 import unittest
@@ -317,6 +317,60 @@ def test_fromDict(self):
         series = Series(data, dtype=float)
         self.assert_(series.dtype == np.float64)
 
+    def test_from_json_to_json(self):
+
+        def _check_orient(series, orient, dtype=None, numpy=True):
+            series.sort()
+            unser = Series.from_json(series.to_json(orient=orient),
+                                     orient=orient, numpy=numpy, dtype=dtype)
+            unser.sort()
+            if series.index.dtype == np.datetime64:
+                mktimestamp = date.fromtimestamp
+                unser.index = [mktimestamp(float(d)) for d in unser.index]
+            if orient == "records" or orient == "values":
+                assert_almost_equal(series.values, unser.values)
+            else:
+                try:
+                    assert_series_equal(series, unser)
+                except:
+                    raise
+                if orient == "split":
+                    self.assert_(series.name == unser.name)
+
+        def _check_all_orients(series, dtype=None):
+            _check_orient(series, "columns", dtype=dtype)
+            _check_orient(series, "records", dtype=dtype)
+            _check_orient(series, "split", dtype=dtype)
+            _check_orient(series, "index", dtype=dtype)
+            _check_orient(series, "values", dtype=dtype)
+
+            _check_orient(series, "columns", dtype=dtype, numpy=False)
+            _check_orient(series, "records", dtype=dtype, numpy=False)
+            _check_orient(series, "split", dtype=dtype, numpy=False)
+            _check_orient(series, "index", dtype=dtype, numpy=False)
+            _check_orient(series, "values", dtype=dtype, numpy=False)
+
+        # basic
+        _check_all_orients(self.series)
+        self.assertEqual(self.series.to_json(),
+                         self.series.to_json(orient="index"))
+
+        objSeries = Series([str(d) for d in self.objSeries],
+                           index=self.objSeries.index,
+                           name=self.objSeries.name)
+        _check_all_orients(objSeries)
+        _check_all_orients(self.empty)
+        _check_all_orients(self.ts)
+
+        # dtype
+        s = Series(range(6), index=['a','b','c','d','e','f'])
+        _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
+        _check_all_orients(Series(s, dtype=np.int), dtype=np.int)
+
+    def test_to_json_except(self):
+        s = Series([1, 2, 3])
+        self.assertRaises(ValueError, s.to_json, orient="garbage")
+
     def test_setindex(self):
         # wrong type
         series = self.series.copy()
diff --git a/pandas/tests/test_ujson.py b/pandas/tests/test_ujson.py
new file mode 100644
index 0000000000000..4dc4529c962cd
--- /dev/null
+++ b/pandas/tests/test_ujson.py
@@ -0,0 +1,1096 @@
+﻿
+import unittest
+from unittest import TestCase
+
+import pandas._ujson as ujson
+try:
+    import json
+except ImportError:
+    import simplejson as json
+import math
+import time
+import datetime
+import calendar
+import StringIO
+import re
+
+import numpy as np
+from numpy.testing import (assert_array_equal, assert_array_almost_equal_nulp,
+                           assert_approx_equal)
+from pandas import DataFrame, Series, Index
+
+
+class UltraJSONTests(TestCase):
+    def test_encodeDictWithUnicodeKeys(self):
+        input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" }
+        output = ujson.encode(input)
+
+        input = { u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1" }
+        output = ujson.encode(input)
+
+        pass
+
+    def test_encodeDoubleConversion(self):
+        input = math.pi
+        output = ujson.encode(input)
+        self.assertEquals(round(input, 5), round(json.loads(output), 5))
+        self.assertEquals(round(input, 5), round(ujson.decode(output), 5))
+        
+    def test_encodeWithDecimal(self):
+        input = 1.0
+        output = ujson.encode(input)
+        self.assertEquals(output, "1.0")
+
+    def test_encodeDoubleNegConversion(self):
+        input = -math.pi
+        output = ujson.encode(input)
+        self.assertEquals(round(input, 5), round(json.loads(output), 5))
+        self.assertEquals(round(input, 5), round(ujson.decode(output), 5))
+
+    def test_encodeArrayOfNestedArrays(self):
+        input = [[[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]] ]
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        #self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        input = np.array(input)
+        assert_array_equal(input, ujson.decode(output, numpy=True, dtype=input.dtype))
+
+    def test_encodeArrayOfDoubles(self):
+        input = [ 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337 ]
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        #self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        assert_array_equal(np.array(input), ujson.decode(output, numpy=True))
+
+    def test_doublePrecisionTest(self):
+        input = 30.012345678901234
+        output = ujson.encode(input, double_precision = 15)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(input, ujson.decode(output))
+
+        output = ujson.encode(input, double_precision = 9)
+        self.assertEquals(round(input, 9), json.loads(output))
+        self.assertEquals(round(input, 9), ujson.decode(output))
+
+        output = ujson.encode(input, double_precision = 3)
+        self.assertEquals(round(input, 3), json.loads(output))
+        self.assertEquals(round(input, 3), ujson.decode(output))
+
+        output = ujson.encode(input)
+        self.assertEquals(round(input, 5), json.loads(output))
+        self.assertEquals(round(input, 5), ujson.decode(output))
+
+    def test_invalidDoublePrecision(self):
+        input = 30.12345678901234567890
+        output = ujson.encode(input, double_precision = 20)
+        # should snap to the max, which is 15
+        self.assertEquals(round(input, 15), json.loads(output))
+        self.assertEquals(round(input, 15), ujson.decode(output))
+
+        output = ujson.encode(input, double_precision = -1)
+        # also should snap to the max, which is 15
+        self.assertEquals(round(input, 15), json.loads(output))
+        self.assertEquals(round(input, 15), ujson.decode(output))
+
+        # will throw typeError
+        self.assertRaises(TypeError, ujson.encode, input, double_precision = '9')
+        # will throw typeError
+        self.assertRaises(TypeError, ujson.encode, input, double_precision = None)
+
+
+    def test_encodeStringConversion(self):
+        input = "A string \\ / \b \f \n \r \t"
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, '"A string \\\\ \\/ \\b \\f \\n \\r \\t"')
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_decodeUnicodeConversion(self):
+        pass
+
+    def test_encodeUnicodeConversion1(self):
+        input = "Räksmörgås اسامة بن محمد بن عوض بن لادن"
+        enc = ujson.encode(input)
+        dec = ujson.decode(enc)
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        self.assertEquals(dec, json.loads(enc))
+        
+    def test_encodeControlEscaping(self):
+        input = "\x19"
+        enc = ujson.encode(input)
+        dec = ujson.decode(enc)
+        self.assertEquals(input, dec)
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        
+
+    def test_encodeUnicodeConversion2(self):
+        input = "\xe6\x97\xa5\xd1\x88"
+        enc = ujson.encode(input)
+        dec = ujson.decode(enc)
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        self.assertEquals(dec, json.loads(enc))
+
+    def test_encodeUnicodeSurrogatePair(self):
+        input = "\xf0\x90\x8d\x86"
+        enc = ujson.encode(input)
+        dec = ujson.decode(enc)
+                
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        self.assertEquals(dec, json.loads(enc))
+
+    def test_encodeUnicode4BytesUTF8(self):
+        input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
+        enc = ujson.encode(input)
+        dec = ujson.decode(enc)
+
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        self.assertEquals(dec, json.loads(enc))
+            
+    def test_encodeUnicode4BytesUTF8Highest(self):
+        input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
+        enc = ujson.encode(input)
+
+        dec = ujson.decode(enc)
+                
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
+        self.assertEquals(dec, json.loads(enc))
+
+        
+    def test_encodeArrayInArray(self):
+        input = [[[[]]]]
+        output = ujson.encode(input)
+
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        assert_array_equal(np.array(input), ujson.decode(output, numpy=True))
+        pass
+
+    def test_encodeIntConversion(self):
+        input = 31337
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeIntNegConversion(self):
+        input = -31337
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+        
+
+    def test_encodeLongNegConversion(self):
+        input = -9223372036854775808
+        output = ujson.encode(input)
+
+        outputjson = json.loads(output)
+        outputujson = ujson.decode(output)
+
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeListConversion(self):
+        input = [ 1, 2, 3, 4 ]
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(input, ujson.decode(output))
+        assert_array_equal(np.array(input), ujson.decode(output, numpy=True))
+        pass
+
+    def test_encodeDictConversion(self):
+        input = { "k1": 1, "k2":  2, "k3": 3, "k4": 4 }
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(input, ujson.decode(output))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeNoneConversion(self):
+        input = None
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeTrueConversion(self):
+        input = True
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeFalseConversion(self):
+        input = False
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_encodeDatetimeConversion(self):
+        ts = time.time()
+        input = datetime.datetime.fromtimestamp(ts)
+        output = ujson.encode(input)
+        expected = calendar.timegm(input.utctimetuple())
+        self.assertEquals(int(expected), json.loads(output))
+        self.assertEquals(int(expected), ujson.decode(output))
+        pass
+
+    def test_encodeDateConversion(self):
+        ts = time.time()
+        input = datetime.date.fromtimestamp(ts)
+
+        output = ujson.encode(input)
+        tup = ( input.year, input.month, input.day, 0, 0, 0 )
+
+        expected = calendar.timegm(tup)
+        self.assertEquals(int(expected), json.loads(output))
+        self.assertEquals(int(expected), ujson.decode(output))
+        pass
+
+    def test_encodeToUTF8(self):
+        input = "\xe6\x97\xa5\xd1\x88"
+        enc = ujson.encode(input, ensure_ascii=False)
+        dec = ujson.decode(enc)
+        self.assertEquals(enc, json.dumps(input, encoding="utf-8", ensure_ascii=False))
+        self.assertEquals(dec, json.loads(enc))
+
+    def test_decodeFromUnicode(self):
+        input = u"{\"obj\": 31337}"
+        dec1 = ujson.decode(input)
+        dec2 = ujson.decode(str(input))
+        self.assertEquals(dec1, dec2)
+
+    def test_encodeRecursionMax(self):
+        # 8 is the max recursion depth
+
+        class O2:
+            member = 0
+            pass
+
+        class O1:
+            member = 0
+            pass
+
+        input = O1()
+        input.member = O2()
+        input.member.member = input
+
+        try:
+            output = ujson.encode(input)
+            assert False, "Expected overflow exception"
+        except(OverflowError):
+            pass
+
+    def test_encodeDoubleNan(self):
+        input = float('nan')
+        assert ujson.encode(input) == 'null', "Expected null"
+        
+    def test_encodeDoubleInf(self):
+        input = float('inf')
+        assert ujson.encode(input) == 'null', "Expected null"
+            
+    def test_encodeDoubleNegInf(self):
+        input = -float('inf')
+        assert ujson.encode(input) == 'null', "Expected null"
+            
+
+    def test_decodeJibberish(self):
+        input = "fdsa sda v9sa fdsa"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeBrokenArrayStart(self):
+        input = "["
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeBrokenObjectStart(self):
+        input = "{"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeBrokenArrayEnd(self):
+        input = "]"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeBrokenObjectEnd(self):
+        input = "}"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeStringUnterminated(self):
+        input = "\"TESTING"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeStringUntermEscapeSequence(self):
+        input = "\"TESTING\\\""
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeStringBadEscape(self):
+        input = "\"TESTING\\\""
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeTrueBroken(self):
+        input = "tru"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeFalseBroken(self):
+        input = "fa"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+
+    def test_decodeNullBroken(self):
+        input = "n"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+        assert False, "Wrong exception"
+            
+
+    def test_decodeBrokenDictKeyTypeLeakTest(self):
+        input = '{{1337:""}}'
+        for x in xrange(1000):
+            try:
+                ujson.decode(input)
+                assert False, "Expected exception!"
+            except(ValueError),e:
+                continue
+
+            assert False, "Wrong exception"
+            
+    def test_decodeBrokenDictLeakTest(self):
+        input = '{{"key":"}'
+        for x in xrange(1000):
+            try:
+                ujson.decode(input)
+                assert False, "Expected exception!"
+            except(ValueError):
+                continue
+
+            assert False, "Wrong exception"
+            
+    def test_decodeBrokenListLeakTest(self):
+        input = '[[[true'
+        for x in xrange(1000):
+            try:
+                ujson.decode(input)
+                assert False, "Expected exception!"
+            except(ValueError):
+                continue
+
+            assert False, "Wrong exception"
+
+    def test_decodeDictWithNoKey(self):
+        input = "{{{{31337}}}}"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+
+        assert False, "Wrong exception"
+
+    def test_decodeDictWithNoColonOrValue(self):
+        input = "{{{{\"key\"}}}}"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+
+        assert False, "Wrong exception"
+
+    def test_decodeDictWithNoValue(self):
+        input = "{{{{\"key\":}}}}"
+        try:
+            ujson.decode(input)
+            assert False, "Expected exception!"
+        except(ValueError):
+            return
+
+        assert False, "Wrong exception"
+
+    def test_decodeNumericIntPos(self):
+        input = "31337"
+        self.assertEquals (31337, ujson.decode(input))
+
+    def test_decodeNumericIntNeg(self):
+        input = "-31337"
+        self.assertEquals (-31337, ujson.decode(input))
+
+    def test_encodeUnicode4BytesUTF8Fail(self):
+        input = "\xfd\xbf\xbf\xbf\xbf\xbf"
+        try:
+            enc = ujson.encode(input)
+            assert False, "Expected exception"
+        except OverflowError:
+            pass
+            
+    def test_encodeNullCharacter(self):
+        input = "31337 \x00 1337"
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+
+        input = "\x00"
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        
+        self.assertEquals('"  \\u0000\\r\\n "', ujson.dumps(u"  \u0000\r\n "))
+        pass
+    
+    def test_decodeNullCharacter(self):
+        input = "\"31337 \\u0000 31337\""
+        self.assertEquals(ujson.decode(input), json.loads(input))
+        
+            
+    def test_encodeListLongConversion(self):
+        input = [9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807 ]
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(input, ujson.decode(output))
+        assert_array_equal(np.array(input), ujson.decode(output, numpy=True))
+        pass
+
+    def test_encodeLongConversion(self):
+        input = 9223372036854775807
+        output = ujson.encode(input)
+        self.assertEquals(input, json.loads(output))
+        self.assertEquals(output, json.dumps(input))
+        self.assertEquals(input, ujson.decode(output))
+        pass
+
+    def test_numericIntExp(self):
+        input = "1337E40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_numericIntFrcExp(self):
+        input = "1.337E40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpEPLUS(self):
+        input = "1337E+40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpePLUS(self):
+        input = "1.337e+40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpE(self):
+        input = "1337E40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpe(self):
+        input = "1337e40"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpEMinus(self):
+        input = "1.337E-4"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+
+    def test_decodeNumericIntExpeMinus(self):
+        input = "1.337e-4"
+        output = ujson.decode(input)
+        self.assertEquals(output, json.loads(input))
+  
+    def test_dumpToFile(self):
+        f = StringIO.StringIO()
+        ujson.dump([1, 2, 3], f)
+        self.assertEquals("[1,2,3]", f.getvalue())
+
+    def test_dumpToFileLikeObject(self):
+        class filelike:
+            def __init__(self):
+                self.bytes = ''
+            def write(self, bytes):
+                self.bytes += bytes
+        f = filelike()
+        ujson.dump([1, 2, 3], f)
+        self.assertEquals("[1,2,3]", f.bytes)
+
+    def test_dumpFileArgsError(self):
+        try:
+            ujson.dump([], '')
+        except TypeError:
+            pass
+        else:
+            assert False, 'expected TypeError'
+ 
+    def test_loadFile(self):
+        f = StringIO.StringIO("[1,2,3,4]")
+        self.assertEquals([1, 2, 3, 4], ujson.load(f))
+        f = StringIO.StringIO("[1,2,3,4]")
+        assert_array_equal(np.array([1, 2, 3, 4]), ujson.load(f, numpy=True))
+
+    def test_loadFileLikeObject(self):
+        class filelike:
+            def read(self):
+                try:
+                    self.end
+                except AttributeError:
+                    self.end = True
+                    return "[1,2,3,4]"
+        f = filelike()
+        self.assertEquals([1, 2, 3, 4], ujson.load(f))
+        f = filelike()
+        assert_array_equal(np.array([1, 2, 3, 4]), ujson.load(f, numpy=True))
+
+    def test_loadFileArgsError(self):
+        try:
+            ujson.load("[]")
+        except TypeError:
+            pass
+        else:
+            assert False, "expected TypeError"
+
+    def test_version(self):
+        assert re.match(r'^\d+\.\d+(\.\d+)?$', ujson.__version__), \
+               "ujson.__version__ must be a string like '1.4.0'"
+
+    def test_encodeNumericOverflow(self):
+        try:
+            ujson.encode(12839128391289382193812939)
+        except OverflowError:
+            pass
+        else:
+            assert False, "expected OverflowError"
+
+    def test_encodeNumericOverflowNested(self):
+        for n in xrange(0, 100):
+            class Nested:
+                x = 12839128391289382193812939
+        
+            nested = Nested()
+        
+            try:
+                ujson.encode(nested)
+            except OverflowError:
+                pass
+            else:
+                assert False, "expected OverflowError"
+
+    def test_decodeNumberWith32bitSignBit(self):
+        #Test that numbers that fit within 32 bits but would have the
+        # sign bit set (2**31 <= x < 2**32) are decoded properly.
+        boundary1 = 2**31
+        boundary2 = 2**32
+        docs = (
+            '{"id": 3590016419}',
+            '{"id": %s}' % 2**31,
+            '{"id": %s}' % 2**32,
+            '{"id": %s}' % ((2**32)-1),
+        )
+        results = (3590016419, 2**31, 2**32, 2**32-1)
+        for doc,result in zip(docs, results):
+            self.assertEqual(ujson.decode(doc)['id'], result)
+
+    def test_encodeBigEscape(self):
+        for x in xrange(10):
+            input = "\xc3\xa5" * 1024 * 1024 * 10
+            output = ujson.encode(input)
+
+    def test_decodeBigEscape(self):
+        for x in xrange(10):
+            input = "\"" + ("\xc3\xa5" * 1024 * 1024 * 10) + "\""
+            output = ujson.decode(input)
+
+    def test_toDict(self):
+        d = {u"key": 31337}
+    
+        class DictTest:
+            def toDict(self):
+                return d
+
+        o = DictTest()
+        output = ujson.encode(o)
+        dec = ujson.decode(output)
+        self.assertEquals(dec, d)
+
+
+class NumpyJSONTests(TestCase):
+
+    def testBool(self):
+        b = np.bool(True)
+        self.assertEqual(ujson.decode(ujson.encode(b)), b)
+
+    def testBoolArray(self):
+        inpt = np.array([True, False, True, True, False, True, False , False],
+                         dtype=np.bool)
+        outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=np.bool)
+        assert_array_equal(inpt, outp)
+
+    def testInt(self):
+        num = np.int(2562010)
+        self.assertEqual(np.int(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int8(2562010)
+        self.assertEqual(np.int8(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int16(2562010)
+        self.assertEqual(np.int16(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int32(2562010)
+        self.assertEqual(np.int32(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int64(2562010)
+        self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint8(2562010)
+        self.assertEqual(np.uint8(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint16(2562010)
+        self.assertEqual(np.uint16(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint32(2562010)
+        self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint64(2562010)
+        self.assertEqual(np.uint64(ujson.decode(ujson.encode(num))), num)
+
+    def testIntArray(self):
+        arr = np.arange(100, dtype=np.int)
+        dtypes = (np.int, np.int8, np.int16, np.int32, np.int64,
+                  np.uint, np.uint8, np.uint16, np.uint32, np.uint64)
+        for dtype in dtypes:
+            inpt = arr.astype(dtype)
+            outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=dtype)
+            assert_array_equal(inpt, outp)
+
+    def testIntMax(self):
+        num = np.int(np.iinfo(np.int).max)
+        self.assertEqual(np.int(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int8(np.iinfo(np.int8).max)
+        self.assertEqual(np.int8(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int16(np.iinfo(np.int16).max)
+        self.assertEqual(np.int16(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int32(np.iinfo(np.int32).max)
+        self.assertEqual(np.int32(ujson.decode(ujson.encode(num))), num)
+
+        num = np.int64(np.iinfo(np.int64).max)
+        self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint8(np.iinfo(np.uint8).max)
+        self.assertEqual(np.uint8(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint16(np.iinfo(np.uint16).max)
+        self.assertEqual(np.uint16(ujson.decode(ujson.encode(num))), num)
+
+        num = np.uint32(np.iinfo(np.uint32).max)
+        self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num)
+
+        #num = np.uint64(np.iinfo(np.uint64).max) # FIXME always overflow
+        #self.assertEqual(np.uint64(ujson.decode(ujson.encode(num))), num))
+
+    def testFloat(self):
+        num = np.float(256.2013)
+        self.assertEqual(np.float(ujson.decode(ujson.encode(num))), num)
+
+        num = np.float16(256.2013)
+        self.assertEqual(np.float16(ujson.decode(ujson.encode(num))), num)
+
+        num = np.float32(256.2013)
+        self.assertEqual(np.float32(ujson.decode(ujson.encode(num))), num)
+
+        num = np.float64(256.2013)
+        self.assertEqual(np.float64(ujson.decode(ujson.encode(num))), num)
+
+    def testFloatArray(self):
+        arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float)
+        dtypes = (np.float, np.float32, np.float64)
+
+        for dtype in dtypes:
+            inpt = arr.astype(dtype)
+            outp = np.array(ujson.decode(ujson.encode(inpt, double_precision=15)), dtype=dtype)
+            assert_array_almost_equal_nulp(inpt, outp)
+
+        inpt = np.arange(1.5, 21.5, 0.2, dtype=np.float16)
+        outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=np.float16)
+        assert_array_almost_equal_nulp(inpt, outp)
+
+    def testFloatMax(self):
+        num = np.float(np.finfo(np.float).max/10)
+        assert_approx_equal(np.float(ujson.decode(ujson.encode(num))), num, 15)
+
+        num = np.float16(np.finfo(np.float16).max/10)
+        assert_approx_equal(np.float16(ujson.decode(ujson.encode(num))), num, 15)
+
+        num = np.float32(np.finfo(np.float32).max/10)
+        assert_approx_equal(np.float32(ujson.decode(ujson.encode(num))), num, 15)
+
+        num = np.float64(np.finfo(np.float64).max/10)
+        assert_approx_equal(np.float64(ujson.decode(ujson.encode(num))), num, 15)
+
+    def testArrays(self):
+        arr = np.arange(100);
+
+        arr = arr.reshape((10, 10))
+        assert_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+        assert_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr)
+
+        arr = arr.reshape((5, 5, 4))
+        assert_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+        assert_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr)
+
+        arr = arr.reshape((100, 1))
+        assert_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+        assert_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr)
+
+        arr = np.arange(96);
+        arr = arr.reshape((2, 2, 2, 2, 3, 2))
+        assert_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+        assert_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr)
+
+        l = ['a', list(), dict(), dict(), list(), 
+             42, 97.8, ['a', 'b'], {'key': 'val'}]
+        arr = np.array(l)
+        assert_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
+
+        arr = np.arange(100.202, 200.202, 1, dtype=np.float32);
+        arr = arr.reshape((5, 5, 4))
+        outp = np.array(ujson.decode(ujson.encode(arr)), dtype=np.float32)
+        assert_array_almost_equal_nulp(arr, outp)
+        outp = ujson.decode(ujson.encode(arr), numpy=True, dtype=np.float32)
+        assert_array_almost_equal_nulp(arr, outp)
+
+    def testArrayNumpyExcept(self):
+
+        input = ujson.dumps([42, {}, 'a'])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(TypeError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps(['a', 'b', [], 'c'])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([['a'], 42])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([42, ['a'], 42])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([{}, []])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([42, None])
+        try:
+            ujson.decode(input, numpy=True)
+            assert False, "Expected exception!"
+        except(TypeError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([{'a': 'b'}])
+        try:
+            ujson.decode(input, numpy=True, labelled=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps({'a': {'b': {'c': 42}}})
+        try:
+            ujson.decode(input, numpy=True, labelled=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+        input = ujson.dumps([{'a': 42, 'b': 23}, {'c': 17}])
+        try:
+            ujson.decode(input, numpy=True, labelled=True)
+            assert False, "Expected exception!"
+        except(ValueError):
+            pass
+        except:
+            assert False, "Wrong exception"
+
+    def testArrayNumpyLabelled(self):
+        input = {'a': []}
+        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
+        self.assertTrue((np.empty((1, 0)) == output[0]).all())
+        self.assertTrue((np.array(['a']) == output[1]).all())
+        self.assertTrue(output[2] is None)
+
+        input = [{'a': 42}]
+        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
+        self.assertTrue((np.array([42]) == output[0]).all())
+        self.assertTrue(output[1] is None)
+        self.assertTrue((np.array([u'a']) == output[2]).all())
+
+        input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}]
+        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
+        expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2))
+        self.assertTrue((expectedvals == output[0]).all())
+        self.assertTrue(output[1] is None)
+        self.assertTrue((np.array([u'a', 'b']) == output[2]).all())
+
+
+        input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}}
+        output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True)
+        expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2))
+        self.assertTrue((expectedvals == output[0]).all())
+        self.assertTrue((np.array(['1','2','3']) == output[1]).all())
+        self.assertTrue((np.array(['a', 'b']) == output[2]).all())
+
+class PandasJSONTests(TestCase):
+
+    def testDataFrame(self):
+        df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z'])
+
+        # column indexed
+        outp = DataFrame(ujson.decode(ujson.encode(df)))
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+        assert_array_equal(df.index, outp.index)
+
+        outp = DataFrame(**ujson.decode(ujson.encode(df, orient="split")))
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+        assert_array_equal(df.index, outp.index)
+
+        outp = DataFrame(ujson.decode(ujson.encode(df, orient="records")))
+        outp.index = df.index
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+
+        outp = DataFrame(ujson.decode(ujson.encode(df, orient="index")))
+        self.assertTrue((df.transpose() == outp).values.all())
+        assert_array_equal(df.transpose().columns, outp.columns)
+        assert_array_equal(df.transpose().index, outp.index)
+
+    def testDataFrameNumpy(self):
+        df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z'])
+
+        # column indexed
+        outp = DataFrame(ujson.decode(ujson.encode(df), numpy=True))
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+        assert_array_equal(df.index, outp.index)
+
+        outp = DataFrame(**ujson.decode(ujson.encode(df, orient="split"), numpy=True))
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+        assert_array_equal(df.index, outp.index)
+
+        outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"), numpy=True))
+        self.assertTrue((df.transpose() == outp).values.all())
+        assert_array_equal(df.transpose().columns, outp.columns)
+        assert_array_equal(df.transpose().index, outp.index)
+
+    def testDataFrameNumpyLabelled(self):
+        df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z'])
+
+        # column indexed
+        outp = DataFrame(*ujson.decode(ujson.encode(df), numpy=True, labelled=True))
+        self.assertTrue((df.T == outp).values.all())
+        assert_array_equal(df.T.columns, outp.columns)
+        assert_array_equal(df.T.index, outp.index)
+
+        outp = DataFrame(*ujson.decode(ujson.encode(df, orient="records"), numpy=True, labelled=True))
+        outp.index = df.index
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+
+        outp = DataFrame(*ujson.decode(ujson.encode(df, orient="index"), numpy=True, labelled=True))
+        self.assertTrue((df == outp).values.all())
+        assert_array_equal(df.columns, outp.columns)
+        assert_array_equal(df.index, outp.index)
+
+    def testSeries(self):
+        s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15])
+        s.sort()
+
+        # column indexed
+        outp = Series(ujson.decode(ujson.encode(s)))
+        outp.sort()
+        self.assertTrue((s == outp).values.all())
+
+        outp = Series(ujson.decode(ujson.encode(s), numpy=True))
+        outp.sort()
+        self.assertTrue((s == outp).values.all())
+
+        outp = Series(**ujson.decode(ujson.encode(s, orient="split")))
+        self.assertTrue((s == outp).values.all())
+        self.assertTrue(s.name == outp.name)
+
+        outp = Series(**ujson.decode(ujson.encode(s, orient="split"), numpy=True))
+        self.assertTrue((s == outp).values.all())
+        self.assertTrue(s.name == outp.name)
+
+        outp = Series(ujson.decode(ujson.encode(s, orient="records"), numpy=True))
+        self.assertTrue((s == outp).values.all())
+
+        outp = Series(ujson.decode(ujson.encode(s, orient="records")))
+        self.assertTrue((s == outp).values.all())
+
+        outp = Series(ujson.decode(ujson.encode(s, orient="index")))
+        outp.sort()
+        self.assertTrue((s == outp).values.all())
+
+        outp = Series(ujson.decode(ujson.encode(s, orient="index"), numpy=True))
+        outp.sort()
+        self.assertTrue((s == outp).values.all())
+
+    def testIndex(self):
+        i = Index([23, 45, 18, 98, 43, 11], name="index")
+
+        # column indexed
+        outp = Index(ujson.decode(ujson.encode(i)))
+        assert_array_equal(i, outp)
+
+        outp = Index(ujson.decode(ujson.encode(i), numpy=True))
+        assert_array_equal(i, outp)
+
+        outp = Index(**ujson.decode(ujson.encode(i, orient="split")))
+        assert_array_equal(i, outp)
+        self.assertTrue(i.name == outp.name)
+
+        outp = Index(**ujson.decode(ujson.encode(i, orient="split"), numpy=True))
+        assert_array_equal(i, outp)
+        self.assertTrue(i.name == outp.name)
+
+        outp = Index(ujson.decode(ujson.encode(i, orient="records")))
+        assert_array_equal(i, outp)
+
+        outp = Index(ujson.decode(ujson.encode(i, orient="records"), numpy=True))
+        assert_array_equal(i, outp)
+
+        outp = Index(ujson.decode(ujson.encode(i, orient="index")))
+        assert_array_equal(i, outp)
+
+        outp = Index(ujson.decode(ujson.encode(i, orient="index"), numpy=True))
+        assert_array_equal(i, outp)
+
+
+"""
+def test_decodeNumericIntFrcOverflow(self):
+input = "X.Y"
+raise NotImplementedError("Implement this test!")
+
+
+def test_decodeStringUnicodeEscape(self):
+input = "\u3131"
+raise NotImplementedError("Implement this test!")
+
+def test_decodeStringUnicodeBrokenEscape(self):
+input = "\u3131"
+raise NotImplementedError("Implement this test!")
+
+def test_decodeStringUnicodeInvalidEscape(self):
+input = "\u3131"
+raise NotImplementedError("Implement this test!")
+
+def test_decodeStringUTF8(self):
+input = "someutfcharacters"
+raise NotImplementedError("Implement this test!")
+
+
+
+"""
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
+                   exit=False)
diff --git a/setup.py b/setup.py
index 75747e1d9fb2f..579a9e0102564 100755
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,8 @@
     msg = "pandas requires NumPy >= 1.6 due to datetime64 dependency"
     sys.exit(msg)
 
+from numpy.distutils.misc_util import get_pkg_info
+
 from distutils.extension import Extension
 from distutils.command.build import build
 from distutils.command.build_ext import build_ext
@@ -363,6 +365,17 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                        sources=[srcpath('sparse', suffix=suffix)],
                        include_dirs=[np.get_include()])
 
+ujson_ext = Extension('pandas._ujson',
+                      sources=['pandas/src/ujson/python/ujson.c',
+                               'pandas/src/ujson/python/objToJSON.c',
+                               'pandas/src/ujson/python/JSONtoObj.c',
+                               'pandas/src/ujson/lib/ultrajsonenc.c',
+                               'pandas/src/ujson/lib/ultrajsondec.c'],
+                      include_dirs=['pandas/src/ujson/python',
+                                    'pandas/src/ujson/lib',
+                                    np.get_include()],
+                      extra_link_args=[get_pkg_info('npymath').libs()])
+
 sandbox_ext = Extension('pandas._sandbox',
                         sources=[srcpath('sandbox', suffix=suffix),
                                  'pandas/src/period.c',
@@ -374,7 +387,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                            sources=[srcpath('cppsandbox', suffix=suffix)],
                            include_dirs=[np.get_include()])
 
-extensions = [tseries_ext, sparse_ext]
+extensions = [tseries_ext, sparse_ext, ujson_ext]
 
 if not ISRELEASED:
     extensions.extend([sandbox_ext])

From 3af585e864c4e4c088af9433c6810d457112fa49 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Thu, 10 May 2012 10:12:51 -0400
Subject: [PATCH 002/114] REF: working toward #1150, broke apart Cython module
 into generated _algos extension

---
 pandas/core/common.py                   |  79 +--
 pandas/core/index.py                    |  23 +-
 pandas/src/datetime.pyx                 | 175 ++++---
 pandas/src/engines.pyx                  |  42 +-
 pandas/src/generate_code.py             |  50 ++
 pandas/src/generated.pyx                |  55 +++
 pandas/src/period.c                     | 609 +++++++++++++-----------
 pandas/src/period.h                     |  55 +--
 pandas/src/tseries.pyx                  |   1 -
 pandas/tseries/index.py                 |   9 +-
 pandas/tseries/period.py                |  52 +-
 pandas/tseries/tests/test_period.py     |  31 +-
 pandas/tseries/tests/test_timeseries.py |   1 +
 setup.py                                |   9 +-
 14 files changed, 710 insertions(+), 481 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index cad17087a7622..bc9873b6c8f43 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -18,6 +18,7 @@ def next(x):
 from numpy.lib.format import read_array, write_array
 import numpy as np
 
+import pandas._algos as _algos
 import pandas._tseries as lib
 from pandas.util import py3compat
 import codecs
@@ -111,17 +112,17 @@ def _unpickle_array(bytes):
 def _take_1d_datetime(arr, indexer, out, fill_value=np.nan):
     view = arr.view(np.int64)
     outview = out.view(np.int64)
-    lib.take_1d_bool(view, indexer, outview, fill_value=fill_value)
+    _algos.take_1d_bool(view, indexer, outview, fill_value=fill_value)
 
 def _take_2d_axis0_datetime(arr, indexer, out, fill_value=np.nan):
     view = arr.view(np.int64)
     outview = out.view(np.int64)
-    lib.take_1d_bool(view, indexer, outview, fill_value=fill_value)
+    _algos.take_1d_bool(view, indexer, outview, fill_value=fill_value)
 
 def _take_2d_axis1_datetime(arr, indexer, out, fill_value=np.nan):
     view = arr.view(np.uint8)
     outview = out.view(np.uint8)
-    lib.take_1d_bool(view, indexer, outview, fill_value=fill_value)
+    _algos.take_1d_bool(view, indexer, outview, fill_value=fill_value)
 
 def _view_wrapper(f, wrap_dtype, na_override=None):
     def wrapper(arr, indexer, out, fill_value=np.nan):
@@ -134,42 +135,42 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
 
 
 _take1d_dict = {
-    'float64' : lib.take_1d_float64,
-    'int32' : lib.take_1d_int32,
-    'int64' : lib.take_1d_int64,
-    'object' : lib.take_1d_object,
-    'bool' : _view_wrapper(lib.take_1d_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(lib.take_1d_int64, np.int64,
+    'float64' : _algos.take_1d_float64,
+    'int32' : _algos.take_1d_int32,
+    'int64' : _algos.take_1d_int64,
+    'object' : _algos.take_1d_object,
+    'bool' : _view_wrapper(_algos.take_1d_bool, np.uint8),
+    'datetime64[us]' : _view_wrapper(_algos.take_1d_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
 _take2d_axis0_dict = {
-    'float64' : lib.take_2d_axis0_float64,
-    'int32' : lib.take_2d_axis0_int32,
-    'int64' : lib.take_2d_axis0_int64,
-    'object' : lib.take_2d_axis0_object,
-    'bool' : _view_wrapper(lib.take_2d_axis0_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(lib.take_2d_axis0_int64, np.int64,
+    'float64' : _algos.take_2d_axis0_float64,
+    'int32' : _algos.take_2d_axis0_int32,
+    'int64' : _algos.take_2d_axis0_int64,
+    'object' : _algos.take_2d_axis0_object,
+    'bool' : _view_wrapper(_algos.take_2d_axis0_bool, np.uint8),
+    'datetime64[us]' : _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
 _take2d_axis1_dict = {
-    'float64' : lib.take_2d_axis1_float64,
-    'int32' : lib.take_2d_axis1_int32,
-    'int64' : lib.take_2d_axis1_int64,
-    'object' : lib.take_2d_axis1_object,
-    'bool' : _view_wrapper(lib.take_2d_axis1_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(lib.take_2d_axis1_int64, np.int64,
+    'float64' : _algos.take_2d_axis1_float64,
+    'int32' : _algos.take_2d_axis1_int32,
+    'int64' : _algos.take_2d_axis1_int64,
+    'object' : _algos.take_2d_axis1_object,
+    'bool' : _view_wrapper(_algos.take_2d_axis1_bool, np.uint8),
+    'datetime64[us]' : _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
 _take2d_multi_dict = {
-    'float64' : lib.take_2d_multi_float64,
-    'int32' : lib.take_2d_multi_int32,
-    'int64' : lib.take_2d_multi_int64,
-    'object' : lib.take_2d_multi_object,
-    'bool' : _view_wrapper(lib.take_2d_multi_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(lib.take_2d_multi_int64, np.int64,
+    'float64' : _algos.take_2d_multi_float64,
+    'int32' : _algos.take_2d_multi_int32,
+    'int64' : _algos.take_2d_multi_int64,
+    'object' : _algos.take_2d_multi_object,
+    'bool' : _view_wrapper(_algos.take_2d_multi_bool, np.uint8),
+    'datetime64[us]' : _view_wrapper(_algos.take_2d_multi_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
@@ -366,18 +367,18 @@ def wrapper(arr, mask, limit=None):
         f(view, mask, limit=limit)
     return wrapper
 
-_pad_1d_datetime = _interp_wrapper(lib.pad_inplace_int64, np.int64)
-_pad_2d_datetime = _interp_wrapper(lib.pad_2d_inplace_int64, np.int64)
-_backfill_1d_datetime = _interp_wrapper(lib.backfill_inplace_int64, np.int64)
-_backfill_2d_datetime = _interp_wrapper(lib.backfill_2d_inplace_int64, np.int64)
+_pad_1d_datetime = _interp_wrapper(_algos.pad_inplace_int64, np.int64)
+_pad_2d_datetime = _interp_wrapper(_algos.pad_2d_inplace_int64, np.int64)
+_backfill_1d_datetime = _interp_wrapper(_algos.backfill_inplace_int64, np.int64)
+_backfill_2d_datetime = _interp_wrapper(_algos.backfill_2d_inplace_int64, np.int64)
 
 def pad_1d(values, limit=None):
     if is_float_dtype(values):
-        _method = lib.pad_inplace_float64
+        _method = _algos.pad_inplace_float64
     elif is_datetime64_dtype(values):
         _method = _pad_1d_datetime
     elif values.dtype == np.object_:
-        _method = lib.pad_inplace_object
+        _method = _algos.pad_inplace_object
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
@@ -385,11 +386,11 @@ def pad_1d(values, limit=None):
 
 def backfill_1d(values, limit=None):
     if is_float_dtype(values):
-        _method = lib.backfill_inplace_float64
+        _method = _algos.backfill_inplace_float64
     elif is_datetime64_dtype(values):
         _method = _backfill_1d_datetime
     elif values.dtype == np.object_:
-        _method = lib.backfill_inplace_object
+        _method = _algos.backfill_inplace_object
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
@@ -397,11 +398,11 @@ def backfill_1d(values, limit=None):
 
 def pad_2d(values, limit=None):
     if is_float_dtype(values):
-        _method = lib.pad_2d_inplace_float64
+        _method = _algos.pad_2d_inplace_float64
     elif is_datetime64_dtype(values):
         _method = _pad_2d_datetime
     elif values.dtype == np.object_:
-        _method = lib.pad_2d_inplace_object
+        _method = _algos.pad_2d_inplace_object
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
@@ -409,11 +410,11 @@ def pad_2d(values, limit=None):
 
 def backfill_2d(values, limit=None):
     if is_float_dtype(values):
-        _method = lib.backfill_2d_inplace_float64
+        _method = _algos.backfill_2d_inplace_float64
     elif is_datetime64_dtype(values):
         _method = _backfill_2d_datetime
     elif values.dtype == np.object_:
-        _method = lib.backfill_2d_inplace_object
+        _method = _algos.backfill_2d_inplace_object
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index d0b9ef4fbde13..dee1764728b92 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -12,6 +12,7 @@
 from pandas.util import py3compat
 import pandas.core.common as com
 import pandas._tseries as lib
+import pandas._algos as _algos
 
 
 __all__ = ['Index']
@@ -56,11 +57,11 @@ class Index(np.ndarray):
     _join_precedence = 1
 
     # Cython methods
-    _groupby = lib.groupby_object
-    _arrmap = lib.arrmap_object
-    _left_indexer = lib.left_join_indexer_object
-    _inner_indexer = lib.inner_join_indexer_object
-    _outer_indexer = lib.outer_join_indexer_object
+    _groupby = _algos.groupby_object
+    _arrmap = _algos.arrmap_object
+    _left_indexer = _algos.left_join_indexer_object
+    _inner_indexer = _algos.inner_join_indexer_object
+    _outer_indexer = _algos.outer_join_indexer_object
 
     _box_scalars = False
 
@@ -1067,11 +1068,11 @@ def copy(self, order='C'):
 
 class Int64Index(Index):
 
-    _groupby = lib.groupby_int64
-    _arrmap = lib.arrmap_int64
-    _left_indexer = lib.left_join_indexer_int64
-    _inner_indexer = lib.inner_join_indexer_int64
-    _outer_indexer = lib.outer_join_indexer_int64
+    _groupby = _algos.groupby_int64
+    _arrmap = _algos.arrmap_int64
+    _left_indexer = _algos.left_join_indexer_int64
+    _inner_indexer = _algos.inner_join_indexer_int64
+    _outer_indexer = _algos.outer_join_indexer_int64
 
     _engine_type = lib.Int64Engine
 
@@ -1378,7 +1379,7 @@ def lexsort_depth(self):
                 return self.nlevels
             else:
                 return 0
-        
+
         int64_labels = [com._ensure_int64(lab) for lab in self.labels]
         for k in range(self.nlevels, 0, -1):
             if lib.is_lexsorted(int64_labels[:k]):
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 42bc2a8fd71f0..823439b71ffc1 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -1388,55 +1388,45 @@ def period_strftime(int64_t value, int freq, int64_t mult, object fmt):
 
 ctypedef int (*accessor)(int64_t ordinal, int freq) except -1
 
-cdef int apply_accessor(accessor func, int64_t value, int freq,
-                        int64_t mult) except -1:
+def get_period_field(int code, int64_t value, int freq,
+                     int64_t mult):
+    cdef accessor f = _get_accessor_func(code)
     value = remove_mult(value, mult)
-    return func(value, freq)
-
-cpdef int get_period_year(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pyear, value, freq, mult)
-
-cpdef int get_period_qyear(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pqyear, value, freq, mult)
+    return f(value, freq)
 
-cpdef int get_period_quarter(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pquarter, value, freq, mult)
-
-cpdef int get_period_month(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pmonth, value, freq, mult)
-
-cpdef int get_period_day(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pday, value, freq, mult)
-
-cpdef int get_period_hour(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&phour, value, freq, mult)
+def get_period_field_arr(int code, ndarray[int64_t] arr,
+                         int freq, int64_t mult):
+    cdef:
+        Py_ssize_t i, sz
+        ndarray[int64_t] out
+        accessor f
 
-cpdef int get_period_minute(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pminute, value, freq, mult)
+    f = _get_accessor_func(code)
 
-cpdef int get_period_second(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&psecond, value, freq, mult)
+    sz = len(arr)
+    out = np.empty(sz, dtype=np.int64)
 
-cpdef int get_period_dow(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pday_of_week, value, freq, mult)
+    for i in range(sz):
+        out[i] = f(remove_mult(arr[i], mult), freq)
 
-cpdef int get_period_week(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pweek, value, freq, mult)
+    return out
 
-cpdef int get_period_weekday(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pweekday, value, freq, mult)
 
-cpdef int get_period_doy(int64_t value, int freq, int64_t mult) except -1:
-    return apply_accessor(&pday_of_year, value, freq, mult)
+cdef int apply_accessor(accessor func, int64_t value, int freq,
+                        int64_t mult) except -1:
+    value = remove_mult(value, mult)
+    return func(value, freq)
 
 # same but for arrays
 
-cdef ndarray[int64_t] apply_accessor_arr(accessor func,
-                                         ndarray[int64_t] arr,
+cdef ndarray[int64_t] apply_accessor_arr(accessor func, ndarray[int64_t] arr,
                                          int freq, int64_t mult):
     cdef:
         Py_ssize_t i, sz
         ndarray[int64_t] out
+        # accessor f
+
+    # f = _get_accessor_func(code)
 
     sz = len(arr)
     out = np.empty(sz, dtype=np.int64)
@@ -1447,41 +1437,106 @@ cdef ndarray[int64_t] apply_accessor_arr(accessor func,
 
     return out
 
-def get_period_year_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pyear, arr, freq, mult)
 
-def get_period_qyear_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pqyear, arr, freq, mult)
+cdef accessor _get_accessor_func(int code):
+    if code == 0:
+        return &pyear
+    elif code == 1:
+        return &pqyear
+    elif code == 2:
+        return &pquarter
+    elif code == 3:
+        return &pmonth
+    elif code == 4:
+        return &pday
+    elif code == 5:
+        return &phour
+    elif code == 6:
+        return &pminute
+    elif code == 7:
+        return &psecond
+    elif code == 8:
+        return &pweek
+    elif code == 9:
+        return &pday_of_year
+    elif code == 10:
+        return &pweekday
+    else:
+        raise ValueError('Unrecognized code: %s' % code)
+
+
+# def get_period_year_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pyear, arr, freq, mult)
+
+# def get_period_qyear_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pqyear, arr, freq, mult)
+
+# def get_period_quarter_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pquarter, arr, freq, mult)
+
+# def get_period_month_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pmonth, arr, freq, mult)
+
+# def get_period_day_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pday, arr, freq, mult)
+
+# def get_period_hour_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(phour, arr, freq, mult)
+
+# def get_period_minute_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pminute, arr, freq, mult)
+
+# def get_period_second_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(psecond, arr, freq, mult)
+
+# def get_period_dow_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pday_of_week, arr, freq, mult)
+
+# def get_period_week_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pweek, arr, freq, mult)
+
+# def get_period_weekday_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pweekday, arr, freq, mult)
+
+# def get_period_doy_arr(ndarray[int64_t] arr, int freq, int64_t mult):
+#     return apply_accessor_arr(pday_of_year, arr, freq, mult)
+
+# def get_abs_time(freq, dailyDate, originalDate):
+#     return getAbsTime(freq, dailyDate, originalDate)
+
+
+# cpdef int get_period_year(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pyear, value, freq, mult)
 
-def get_period_quarter_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pquarter, arr, freq, mult)
+# cpdef int get_period_qyear(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pqyear, value, freq, mult)
 
-def get_period_month_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pmonth, arr, freq, mult)
+# cpdef int get_period_quarter(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pquarter, value, freq, mult)
 
-def get_period_day_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pday, arr, freq, mult)
+# cpdef int get_period_month(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pmonth, value, freq, mult)
 
-def get_period_hour_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&phour, arr, freq, mult)
+# cpdef int get_period_day(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pday, value, freq, mult)
 
-def get_period_minute_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pminute, arr, freq, mult)
+# cpdef int get_period_hour(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(phour, value, freq, mult)
 
-def get_period_second_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&psecond, arr, freq, mult)
+# cpdef int get_period_minute(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pminute, value, freq, mult)
 
-def get_period_dow_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pday_of_week, arr, freq, mult)
+# cpdef int get_period_second(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(psecond, value, freq, mult)
 
-def get_period_week_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pweek, arr, freq, mult)
+# cpdef int get_period_dow(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pday_of_week, value, freq, mult)
 
-def get_period_weekday_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pweekday, arr, freq, mult)
+# cpdef int get_period_week(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pweek, value, freq, mult)
 
-def get_period_doy_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-    return apply_accessor_arr(&pday_of_year, arr, freq, mult)
+# cpdef int get_period_weekday(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pweekday, value, freq, mult)
 
-def get_abs_time(freq, dailyDate, originalDate):
-    return getAbsTime(freq, dailyDate, originalDate)
+# cpdef int get_period_doy(int64_t value, int freq, int64_t mult) except -1:
+#     return apply_accessor(pday_of_year, value, freq, mult)
diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx
index 07a547de8da15..df92cce1c3efa 100644
--- a/pandas/src/engines.pyx
+++ b/pandas/src/engines.pyx
@@ -12,7 +12,7 @@ cimport util
 
 import numpy as np
 
-# import _tseries
+import _algos
 
 # include "hashtable.pyx"
 
@@ -243,14 +243,14 @@ cdef class Int64Engine(IndexEngine):
         return Int64HashTable(n)
 
     def _call_monotonic(self, values):
-        return is_monotonic_int64(values)
+        return _algos.is_monotonic_int64(values)
 
     def get_pad_indexer(self, other, limit=None):
-        return pad_int64(self._get_index_values(), other,
+        return _algos.pad_int64(self._get_index_values(), other,
                                   limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
-        return backfill_int64(self._get_index_values(), other,
+        return _algos.backfill_int64(self._get_index_values(), other,
                                        limit=limit)
 
     cdef _get_bool_indexer(self, object val):
@@ -292,26 +292,26 @@ cdef class Float64Engine(IndexEngine):
         return Float64HashTable(n)
 
     def _call_monotonic(self, values):
-        return is_monotonic_float64(values)
+        return _algos.is_monotonic_float64(values)
 
     def get_pad_indexer(self, other, limit=None):
-        return pad_float64(self._get_index_values(), other,
+        return _algos.pad_float64(self._get_index_values(), other,
                                     limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
-        return backfill_float64(self._get_index_values(), other,
+        return _algos.backfill_float64(self._get_index_values(), other,
                                          limit=limit)
 
 _pad_functions = {
-    'object' : pad_object,
-    'int64' : pad_int64,
-    'float64' : pad_float64
+    'object' : _algos.pad_object,
+    'int64' : _algos.pad_int64,
+    'float64' : _algos.pad_float64
 }
 
 _backfill_functions = {
-    'object': backfill_object,
-    'int64': backfill_int64,
-    'float64': backfill_float64
+    'object': _algos.backfill_object,
+    'int64': _algos.backfill_int64,
+    'float64': _algos.backfill_float64
 }
 
 cdef class ObjectEngine(IndexEngine):
@@ -322,14 +322,14 @@ cdef class ObjectEngine(IndexEngine):
         return PyObjectHashTable(n)
 
     def _call_monotonic(self, values):
-        return is_monotonic_object(values)
+        return _algos.is_monotonic_object(values)
 
     def get_pad_indexer(self, other, limit=None):
-        return pad_object(self._get_index_values(), other,
+        return _algos.pad_object(self._get_index_values(), other,
                                    limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
-        return backfill_object(self._get_index_values(), other,
+        return _algos.backfill_object(self._get_index_values(), other,
                                         limit=limit)
 
 
@@ -353,7 +353,7 @@ cdef class DatetimeEngine(Int64Engine):
         return self.index_weakref().values.view('i8')
 
     def _call_monotonic(self, values):
-        return is_monotonic_int64(values)
+        return _algos.is_monotonic_int64(values)
 
     cpdef get_loc(self, object val):
         if is_definitely_invalid_key(val):
@@ -404,15 +404,15 @@ cdef class DatetimeEngine(Int64Engine):
         if other.dtype != 'M8':
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
-        return pad_int64(self._get_index_values(), other,
-                                  limit=limit)
+        return _algos.pad_int64(self._get_index_values(), other,
+                                limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
         if other.dtype != 'M8':
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
-        return backfill_int64(self._get_index_values(), other,
-                                       limit=limit)
+        return _algos.backfill_int64(self._get_index_values(), other,
+                                     limit=limit)
 
 
 # ctypedef fused idxvalue_t:
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index ee151b6ebc8ef..7650cdb1109da 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -1,5 +1,51 @@
 from pandas.util.py3compat import StringIO
 
+header = """
+cimport numpy as np
+cimport cython
+
+from numpy cimport *
+
+from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
+                      PyDict_Contains, PyDict_Keys,
+                      Py_INCREF, PyTuple_SET_ITEM,
+                      PyTuple_SetItem,
+                      PyTuple_New)
+from cpython cimport PyFloat_Check
+cimport cpython
+
+import numpy as np
+isnan = np.isnan
+cdef double NaN = <double> np.NaN
+cdef double nan = NaN
+
+from datetime import datetime as pydatetime
+
+# this is our datetime.pxd
+from datetime cimport *
+
+from khash cimport *
+
+cdef inline int int_max(int a, int b): return a if a >= b else b
+cdef inline int int_min(int a, int b): return a if a <= b else b
+
+ctypedef unsigned char UChar
+
+cimport util
+from util cimport is_array, _checknull, _checknan
+
+cdef extern from "math.h":
+    double sqrt(double x)
+    double fabs(double)
+
+# import datetime C API
+PyDateTime_IMPORT
+
+# initialize numpy
+import_array()
+import_ufunc()
+"""
+
 take_1d_template = """@cython.wraparound(False)
 @cython.boundscheck(False)
 def take_1d_%(name)s(ndarray[%(c_type)s] values,
@@ -540,6 +586,8 @@ def arrmap_%(name)s(ndarray[%(c_type)s] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -851,6 +899,8 @@ def generate_from_template(template, ndim=1, exclude=None):
 
 def generate_take_cython_file(path='generated.pyx'):
     with open(path, 'w') as f:
+        print >> f, header
+
         for template in templates_1d:
             print >> f, generate_from_template(template)
 
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index ed5f12791abf4..44442210b7575 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -1,3 +1,48 @@
+
+cimport numpy as np
+cimport cython
+
+from numpy cimport *
+
+from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
+                      PyDict_Contains, PyDict_Keys,
+                      Py_INCREF, PyTuple_SET_ITEM,
+                      PyTuple_SetItem,
+                      PyTuple_New)
+from cpython cimport PyFloat_Check
+cimport cpython
+
+import numpy as np
+isnan = np.isnan
+cdef double NaN = <double> np.NaN
+cdef double nan = NaN
+
+from datetime import datetime as pydatetime
+
+# this is our datetime.pxd
+from datetime cimport *
+
+from khash cimport *
+
+cdef inline int int_max(int a, int b): return a if a >= b else b
+cdef inline int int_min(int a, int b): return a if a <= b else b
+
+ctypedef unsigned char UChar
+
+cimport util
+from util cimport is_array, _checknull, _checknan
+
+cdef extern from "math.h":
+    double sqrt(double x)
+    double fabs(double)
+
+# import datetime C API
+PyDateTime_IMPORT
+
+# initialize numpy
+import_array()
+import_ufunc()
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cpdef map_indices_float64(ndarray[float64_t] index):
@@ -1751,6 +1796,8 @@ def arrmap_float64(ndarray[float64_t] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -1764,6 +1811,8 @@ def arrmap_object(ndarray[object] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -1777,6 +1826,8 @@ def arrmap_int32(ndarray[int32_t] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -1790,6 +1841,8 @@ def arrmap_int64(ndarray[int64_t] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
@@ -1803,6 +1856,8 @@ def arrmap_bool(ndarray[uint8_t] index, object func):
 
     cdef ndarray[object] result = np.empty(length, dtype=np.object_)
 
+    from _tseries import maybe_convert_objects
+
     for i in range(length):
         result[i] = func(index[i])
 
diff --git a/pandas/src/period.c b/pandas/src/period.c
index d7c3260f71866..ee44720a51810 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -1,6 +1,5 @@
 #include "period.h"
-#include "limits.h"
-// #include "numpy/ndarraytypes.h"
+
 
 /*
  * Borrowed and derived code from scikits.timeseries that we will expose via
@@ -29,7 +28,7 @@ static int days_in_month[2][12] = {
 };
 
 /* Return 1/0 iff year points to a leap year in calendar. */
-static int dInfoCalc_Leapyear(int64_t year, int calendar)
+static int dInfoCalc_Leapyear(npy_int64 year, int calendar)
 {
     if (calendar == GREGORIAN_CALENDAR) {
         return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0));
@@ -39,7 +38,7 @@ static int dInfoCalc_Leapyear(int64_t year, int calendar)
 }
 
 /* Return the day of the week for the given absolute date. */
-static int dInfoCalc_DayOfWeek(int64_t absdate)
+static int dInfoCalc_DayOfWeek(npy_int64 absdate)
 {
     int day_of_week;
 
@@ -61,7 +60,7 @@ static int monthToQuarter(int month) { return ((month-1)/3)+1; }
    using the Gregorian Epoch) value by two days because the Epoch
    (0001-01-01) in the Julian calendar lies 2 days before the Epoch in
    the Gregorian calendar. */
-static int dInfoCalc_YearOffset(int64_t year, int calendar)
+static int dInfoCalc_YearOffset(npy_int64 year, int calendar)
 {
     year--;
     if (calendar == GREGORIAN_CALENDAR) {
@@ -93,7 +92,8 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo,
     /* Calculate the absolute date */
     {
         int leap;
-        int64_t yearoffset,absdate;
+		npy_int64 absdate;
+        int yearoffset;
 
         /* Range check */
         Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366),
@@ -173,19 +173,18 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo,
        than with this iterative approach... */
 static
 int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo,
-                  int64_t absdate,
-                  int calendar)
+							 npy_int64 absdate, int calendar)
 {
-    register int64_t year;
-    int64_t yearoffset;
+    register npy_int64 year;
+    npy_int64 yearoffset;
     int leap,dayoffset;
     int *monthoffset;
 
     /* Approximate year */
     if (calendar == GREGORIAN_CALENDAR) {
-        year = (int64_t)(((double)absdate) / 365.2425);
+        year = (npy_int64)(((double)absdate) / 365.2425);
     } else if (calendar == JULIAN_CALENDAR) {
-        year = (int64_t)(((double)absdate) / 365.25);
+        year = (npy_int64)(((double)absdate) / 365.25);
     } else {
         Py_Error(PyExc_ValueError, "unknown calendar");
     }
@@ -194,7 +193,7 @@ int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo,
     /* Apply corrections to reach the correct year */
     while (1) {
         /* Calculate the year offset */
-        yearoffset = dInfoCalc_YearOffset(year,calendar);
+        yearoffset = dInfoCalc_YearOffset(year, calendar);
         if (PyErr_Occurred())
             goto onError;
 
@@ -254,28 +253,27 @@ int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo,
 
 // helpers for frequency conversion routines //
 
-static int64_t DtoB_weekday(int64_t fromDate) {
-    return (((fromDate) / 7) * 5) + (fromDate)%7;
+static npy_int64 DtoB_weekday(npy_int64 ordinal) {
+    return (((ordinal) / 7) * 5) + (ordinal) % 7;
 }
 
-static int64_t DtoB_WeekendToMonday(int64_t absdate, int day_of_week) {
-
+static npy_int64 DtoB_WeekendToMonday(npy_int64 ordinal, int day_of_week) {
     if (day_of_week > 4) {
         //change to Monday after weekend
-        absdate += (7 - day_of_week);
+        ordinal += (7 - day_of_week);
     }
-    return DtoB_weekday(absdate);
+    return DtoB_weekday(ordinal);
 }
 
-static int64_t DtoB_WeekendToFriday(int64_t absdate, int day_of_week) {
+static npy_int64 DtoB_WeekendToFriday(npy_int64 ordinal, int day_of_week) {
     if (day_of_week > 4) {
         //change to friday before weekend
-        absdate -= (day_of_week - 4);
+        ordinal -= (day_of_week - 4);
     }
-    return DtoB_weekday(absdate);
+    return DtoB_weekday(ordinal);
 }
 
-static int64_t absdate_from_ymd(int y, int m, int d) {
+static npy_int64 absdate_from_ymd(int y, int m, int d) {
     struct date_info tempDate;
     if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0, GREGORIAN_CALENDAR)) {
         return INT_ERR_CODE;
@@ -285,20 +283,24 @@ static int64_t absdate_from_ymd(int y, int m, int d) {
 
 //************ FROM DAILY ***************
 
-static int64_t asfreq_DtoA(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_DtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, fromDate,
-                    GREGORIAN_CALENDAR)) return INT_ERR_CODE;
-    if (dinfo.month > af_info->to_a_year_end) { return (int64_t)(dinfo.year + 1); }
-    else { return (int64_t)(dinfo.year); }
+    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET,
+								 GREGORIAN_CALENDAR)) return INT_ERR_CODE;
+    if (dinfo.month > af_info->to_a_year_end) {
+	  return (npy_int64)(dinfo.year + 1 - BASE_YEAR);
+	}
+    else {
+	  return (npy_int64)(dinfo.year - BASE_YEAR);
+	}
 }
 
-static int64_t DtoQ_yq(int64_t fromDate, asfreq_info *af_info,
-                              int *year, int *quarter) {
+static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info,
+					   int *year, int *quarter) {
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, fromDate,
-                    GREGORIAN_CALENDAR)) return INT_ERR_CODE;
+    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET,
+								 GREGORIAN_CALENDAR)) return INT_ERR_CODE;
     if (af_info->to_q_year_end != 12) {
         dinfo.month -= af_info->to_q_year_end;
         if (dinfo.month <= 0) { dinfo.month += 12; }
@@ -313,34 +315,34 @@ static int64_t DtoQ_yq(int64_t fromDate, asfreq_info *af_info,
 }
 
 
-static int64_t asfreq_DtoQ(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_DtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     int year, quarter;
 
-    if (DtoQ_yq(fromDate, af_info, &year, &quarter) == INT_ERR_CODE) {
+    if (DtoQ_yq(ordinal, af_info, &year, &quarter) == INT_ERR_CODE) {
         return INT_ERR_CODE;
     }
 
-    return (int64_t)((year - 1) * 4 + quarter);
+    return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1);
 }
 
-static int64_t asfreq_DtoM(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_DtoM(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, fromDate, GREGORIAN_CALENDAR))
+    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, GREGORIAN_CALENDAR))
         return INT_ERR_CODE;
-    return (int64_t)((dinfo.year - 1) * 12 + dinfo.month);
+    return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1);
 }
 
-static int64_t asfreq_DtoW(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return (fromDate - (1 + af_info->to_week_end))/7 + 1;
+static npy_int64 asfreq_DtoW(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end))/7 + 1;
 }
 
-static int64_t asfreq_DtoB(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_DtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, fromDate,
-                    GREGORIAN_CALENDAR)) return INT_ERR_CODE;
+    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET,
+								 GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
     if (relation == 'S') {
         return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week);
@@ -349,222 +351,241 @@ static int64_t asfreq_DtoB(int64_t fromDate, char relation, asfreq_info *af_info
     }
 }
 
-static int64_t asfreq_DtoB_forConvert(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_DtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, fromDate, GREGORIAN_CALENDAR))
+    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, GREGORIAN_CALENDAR))
         return INT_ERR_CODE;
 
     if (dinfo.day_of_week > 4) {
         return INT_ERR_CODE;
     } else {
-        return DtoB_weekday(fromDate);
+        return DtoB_weekday(ordinal);
     }
 }
 
 // needed for getDateInfo function
-static int64_t asfreq_DtoD(int64_t fromDate, char relation, asfreq_info *af_info) { return fromDate; }
-
-static int64_t asfreq_DtoHIGHFREQ(int64_t fromDate, char relation, int64_t periodsPerDay) {
-    if (fromDate >= HIGHFREQ_ORIG) {
-        if (relation == 'S') { return (fromDate - HIGHFREQ_ORIG)*(periodsPerDay) + 1; }
-        else                 { return (fromDate - HIGHFREQ_ORIG + 1)*(periodsPerDay); }
+static npy_int64 asfreq_DtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) { return ordinal; }
+
+static npy_int64 asfreq_DtoHIGHFREQ(npy_int64 ordinal, char relation, npy_int64 periodsPerDay) {
+    if (ordinal >= HIGHFREQ_ORIG) {
+        if (relation == 'S') {
+		  return (ordinal - HIGHFREQ_ORIG)*(periodsPerDay) + 1;
+		}
+        else {
+		  return (ordinal - HIGHFREQ_ORIG + 1)*(periodsPerDay);
+		}
     } else { return INT_ERR_CODE; }
 }
 
-static int64_t asfreq_DtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoHIGHFREQ(fromDate, relation, 24); }
-static int64_t asfreq_DtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoHIGHFREQ(fromDate, relation, 24*60); }
-static int64_t asfreq_DtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoHIGHFREQ(fromDate, relation, 24*60*60); }
+static npy_int64 asfreq_DtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoHIGHFREQ(ordinal, relation, 24); }
+static npy_int64 asfreq_DtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoHIGHFREQ(ordinal, relation, 24*60); }
+static npy_int64 asfreq_DtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoHIGHFREQ(ordinal, relation, 24*60*60); }
 
 //************ FROM SECONDLY ***************
 
-static int64_t asfreq_StoD(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/(60*60*24) + HIGHFREQ_ORIG; }
-
-static int64_t asfreq_StoA(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoA(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_StoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_StoM(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoM(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_StoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_StoB(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_StoB_forConvert(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_StoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_StoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/60 + 1; }
-static int64_t asfreq_StoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/(60*60) + 1; }
+static npy_int64 asfreq_StoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/(60*60*24) + HIGHFREQ_ORIG; }
+
+static npy_int64 asfreq_StoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoA(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_StoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_StoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoM(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_StoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_StoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_StoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB_forConvert(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_StoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/60 + 1; }
+static npy_int64 asfreq_StoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/(60*60) + 1; }
 
 //************ FROM MINUTELY ***************
 
-static int64_t asfreq_TtoD(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/(60*24) + HIGHFREQ_ORIG; }
-
-static int64_t asfreq_TtoA(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoA(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_TtoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_TtoM(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoM(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_TtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_TtoB(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-
-static int64_t asfreq_TtoB_forConvert(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_TtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-
-static int64_t asfreq_TtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/60 + 1; }
-static int64_t asfreq_TtoS(int64_t fromDate, char relation, asfreq_info *af_info) {
-    if (relation == 'S') {  return fromDate*60 - 59; }
-    else                 {  return fromDate*60;      }}
+static npy_int64 asfreq_TtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/(60*24) + HIGHFREQ_ORIG; }
+
+static npy_int64 asfreq_TtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoA(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_TtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_TtoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoM(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_TtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_TtoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+
+static npy_int64 asfreq_TtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB_forConvert(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+
+static npy_int64 asfreq_TtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/60 + 1; }
+static npy_int64 asfreq_TtoS(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    if (relation == 'S') {  return ordinal*60 - 59; }
+    else                 {  return ordinal*60;      }}
 
 //************ FROM HOURLY ***************
 
-static int64_t asfreq_HtoD(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return (fromDate - 1)/24 + HIGHFREQ_ORIG; }
-static int64_t asfreq_HtoA(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoA(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_HtoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_HtoM(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoM(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_HtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
-static int64_t asfreq_HtoB(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-
-static int64_t asfreq_HtoB_forConvert(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_HtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_HtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return (ordinal - 1)/24 + HIGHFREQ_ORIG; }
+static npy_int64 asfreq_HtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoA(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_HtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_HtoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoM(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_HtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_HtoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+
+static npy_int64 asfreq_HtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoB_forConvert(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
 // calculation works out the same as TtoS, so we just call that function for HtoT
-static int64_t asfreq_HtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_TtoS(fromDate, relation, &NULL_AF_INFO); }
-static int64_t asfreq_HtoS(int64_t fromDate, char relation, asfreq_info *af_info) {
-    if (relation == 'S') {  return fromDate*60*60 - 60*60 + 1; }
-    else                 {  return fromDate*60*60;             }}
+static npy_int64 asfreq_HtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_TtoS(ordinal, relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_HtoS(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    if (relation == 'S') {  return ordinal*60*60 - 60*60 + 1; }
+    else                 {  return ordinal*60*60;             }}
 
 //************ FROM BUSINESS ***************
 
-static int64_t asfreq_BtoD(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return ((fromDate-1)/5)*7 + (fromDate-1)%5 + 1; }
+static npy_int64 asfreq_BtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    {
+	  return ((ordinal-1)/5)*7 + (ordinal-1)%5 + 1- ORD_OFFSET;
+	}
 
-static int64_t asfreq_BtoA(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoA(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_BtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoA(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_BtoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_BtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_BtoM(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoM(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_BtoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoM(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static int64_t asfreq_BtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_BtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_BtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoH(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_BtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoH(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static int64_t asfreq_BtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoT(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_BtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoT(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static int64_t asfreq_BtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoS(asfreq_BtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_BtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoS(asfreq_BtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
 //************ FROM WEEKLY ***************
 
-static int64_t asfreq_WtoD(int64_t fromDate, char relation, asfreq_info *af_info) {
-    if (relation == 'S') { return fromDate * 7 - 6 + af_info->from_week_end;}
-    else                 { return fromDate * 7 + af_info->from_week_end; }
+static npy_int64 asfreq_WtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    if (relation == 'S') {
+	  return ordinal * 7 - 6 + af_info->from_week_end - ORD_OFFSET;
+	}
+    else {
+	  return ordinal * 7 + af_info->from_week_end - ORD_OFFSET;
+	}
 }
 
-static int64_t asfreq_WtoA(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoA(asfreq_WtoD(fromDate, 'E', af_info), relation, af_info); }
-static int64_t asfreq_WtoQ(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoQ(asfreq_WtoD(fromDate, 'E', af_info), relation, af_info); }
-static int64_t asfreq_WtoM(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoM(asfreq_WtoD(fromDate, 'E', af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_WtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoA(asfreq_WtoD(ordinal, 'E', af_info), relation, af_info); }
+static npy_int64 asfreq_WtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoQ(asfreq_WtoD(ordinal, 'E', af_info), relation, af_info); }
+static npy_int64 asfreq_WtoM(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoM(asfreq_WtoD(ordinal, 'E', af_info), relation, &NULL_AF_INFO); }
 
-static int64_t asfreq_WtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_WtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_WtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_WtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_WtoB(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, asfreq_WtoD(fromDate, relation, af_info),
+    if (dInfoCalc_SetFromAbsDate(&dinfo,
+								 asfreq_WtoD(ordinal, relation, af_info) + ORD_OFFSET,
                     GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
     if (relation == 'S') { return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); }
     else                 { return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); }
 }
 
-static int64_t asfreq_WtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoH(asfreq_WtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_WtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoT(asfreq_WtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_WtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoS(asfreq_WtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_WtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoH(asfreq_WtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_WtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoT(asfreq_WtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_WtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoS(asfreq_WtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
 
 //************ FROM MONTHLY ***************
 
-static void MtoD_ym(int64_t fromDate, int64_t *y, int64_t *m) {
-    *y = (fromDate - 1) / 12 + 1;
-    *m = fromDate - 12 * (*y) - 1;
+static int mod_compat(int x, int m) {
+  int result = x % m;
+  if (result < 0) return result + m;
+  return result;
+}
+
+static void MtoD_ym(npy_int64 ordinal, int *y, int *m) {
+    *y = ordinal / 12 + BASE_YEAR;
+    *m = mod_compat(ordinal + 1, 12);
 }
 
-static int64_t asfreq_MtoD(int64_t fromDate, char relation, asfreq_info *af_info) {
 
-    int64_t y, m, absdate;
+static npy_int64 asfreq_MtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+
+    npy_int64 absdate;
+    int y, m;
 
     if (relation == 'S') {
-        MtoD_ym(fromDate, &y, &m);
+        MtoD_ym(ordinal, &y, &m);
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
-        return absdate;
+        return absdate - ORD_OFFSET;
     } else {
-        MtoD_ym(fromDate+1, &y, &m);
+        MtoD_ym(ordinal+1, &y, &m);
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
-        return absdate-1;
+        return absdate - 1 - ORD_OFFSET;
     }
 }
 
-static int64_t asfreq_MtoA(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoA(asfreq_MtoD(fromDate, 'E', &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_MtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoA(asfreq_MtoD(ordinal, 'E', &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_MtoQ(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoQ(asfreq_MtoD(fromDate, 'E', &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_MtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoQ(asfreq_MtoD(ordinal, 'E', &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_MtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_MtoD(fromDate, relation, &NULL_AF_INFO), relation, af_info); }
+static npy_int64 asfreq_MtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_MtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 
-static int64_t asfreq_MtoB(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, asfreq_MtoD(fromDate, relation, &NULL_AF_INFO),
-                    GREGORIAN_CALENDAR)) return INT_ERR_CODE;
+    if (dInfoCalc_SetFromAbsDate(&dinfo,
+								 asfreq_MtoD(ordinal, relation, &NULL_AF_INFO) + ORD_OFFSET,
+								 GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
     if (relation == 'S') { return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); }
     else                 { return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); }
 }
 
-static int64_t asfreq_MtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoH(asfreq_MtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_MtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoT(asfreq_MtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static int64_t asfreq_MtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoS(asfreq_MtoD(fromDate, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_MtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoH(asfreq_MtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_MtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoT(asfreq_MtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_MtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoS(asfreq_MtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
 //************ FROM QUARTERLY ***************
 
-static void QtoD_ym(int64_t fromDate, int64_t *y, int64_t *m, asfreq_info *af_info) {
-
-    *y = (fromDate - 1) / 4 + 1;
-    *m = (fromDate + 4) * 3 - 12 * (*y) - 2;
+static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) {
+    *y = ordinal / 4 + BASE_YEAR;
+    *m = (ordinal % 4) * 3 + 1;
 
     if (af_info->from_q_year_end != 12) {
         *m += af_info->from_q_year_end;
@@ -573,106 +594,117 @@ static void QtoD_ym(int64_t fromDate, int64_t *y, int64_t *m, asfreq_info *af_in
     }
 }
 
-static int64_t asfreq_QtoD(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_QtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
-    int64_t y, m, absdate;
+    npy_int64 absdate;
+    int y, m;
 
     if (relation == 'S') {
-        QtoD_ym(fromDate, &y, &m, af_info);
+        QtoD_ym(ordinal, &y, &m, af_info);
+		// printf("ordinal: %d, year: %d, month: %d\n", (int) ordinal, y, m);
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
-        return absdate;
+        return absdate - ORD_OFFSET;
     } else {
-        QtoD_ym(fromDate+1, &y, &m, af_info);
+        QtoD_ym(ordinal+1, &y, &m, af_info);
+		// printf("ordinal: %d, year: %d, month: %d\n", (int) ordinal, y, m);
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
-        return absdate - 1;
+        return absdate - 1 - ORD_OFFSET;
     }
 }
 
-static int64_t asfreq_QtoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_QtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_QtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_QtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_QtoA(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoA(asfreq_QtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_QtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoA(asfreq_QtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_QtoM(int64_t fromDate, char relation, asfreq_info *af_info) {
-    return asfreq_DtoM(asfreq_QtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_QtoM(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    return asfreq_DtoM(asfreq_QtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
 
-static int64_t asfreq_QtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_QtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_QtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_QtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_QtoB(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, asfreq_QtoD(fromDate, relation, af_info),
-                    GREGORIAN_CALENDAR)) return INT_ERR_CODE;
+    if (dInfoCalc_SetFromAbsDate(&dinfo,
+								 asfreq_QtoD(ordinal, relation, af_info) + ORD_OFFSET,
+								 GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
     if (relation == 'S') { return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); }
     else                 { return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); }
 }
 
 
-static int64_t asfreq_QtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoH(asfreq_QtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_QtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoT(asfreq_QtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_QtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoS(asfreq_QtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_QtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoH(asfreq_QtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_QtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoT(asfreq_QtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_QtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoS(asfreq_QtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
 
 
 //************ FROM ANNUAL ***************
 
-static int64_t asfreq_AtoD(int64_t fromDate, char relation, asfreq_info *af_info) {
-    int64_t absdate, year, final_adj;
+static npy_int64 asfreq_AtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+    npy_int64 absdate, final_adj;
+	int year;
     int month = (af_info->from_a_year_end) % 12;
 
+	// start from 1970
+	ordinal += BASE_YEAR;
+
     if (month == 0) { month = 1; }
     else { month += 1; }
 
     if (relation == 'S') {
-        if (af_info->from_a_year_end == 12) {year = fromDate;}
-        else {year = fromDate - 1;}
+        if (af_info->from_a_year_end == 12) {year = ordinal;}
+        else {year = ordinal - 1;}
         final_adj = 0;
     } else {
-        if (af_info->from_a_year_end == 12) {year = fromDate+1;}
-        else {year = fromDate;}
+        if (af_info->from_a_year_end == 12) {year = ordinal+1;}
+        else {year = ordinal;}
         final_adj = -1;
     }
     absdate = absdate_from_ymd(year, month, 1);
-    if (absdate  == INT_ERR_CODE) return INT_ERR_CODE;
-    return absdate + final_adj;
+    if (absdate  == INT_ERR_CODE) {
+	  return INT_ERR_CODE;
+	}
+    return absdate + final_adj - ORD_OFFSET;
 }
 
-static int64_t asfreq_AtoA(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoA(asfreq_AtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_AtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoA(asfreq_AtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_AtoQ(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoQ(asfreq_AtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_AtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoQ(asfreq_AtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_AtoM(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoM(asfreq_AtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_AtoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoM(asfreq_AtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_AtoW(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoW(asfreq_AtoD(fromDate, relation, af_info), relation, af_info); }
+static npy_int64 asfreq_AtoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoW(asfreq_AtoD(ordinal, relation, af_info), relation, af_info); }
 
-static int64_t asfreq_AtoB(int64_t fromDate, char relation, asfreq_info *af_info) {
+static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 
     struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, asfreq_AtoD(fromDate, relation, af_info),
+    if (dInfoCalc_SetFromAbsDate(&dinfo,
+								 asfreq_AtoD(ordinal, relation, af_info) + ORD_OFFSET,
                     GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
     if (relation == 'S') { return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); }
     else                 { return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); }
 }
 
-static int64_t asfreq_AtoH(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoH(asfreq_AtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_AtoT(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoT(asfreq_AtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
-static int64_t asfreq_AtoS(int64_t fromDate, char relation, asfreq_info *af_info)
-    { return asfreq_DtoS(asfreq_AtoD(fromDate, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_AtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoH(asfreq_AtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_AtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoT(asfreq_AtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
+static npy_int64 asfreq_AtoS(npy_int64 ordinal, char relation, asfreq_info *af_info)
+    { return asfreq_DtoS(asfreq_AtoD(ordinal, relation, af_info), relation, &NULL_AF_INFO); }
 
-static int64_t nofunc(int64_t fromDate, char relation, asfreq_info *af_info) { return INT_ERR_CODE; }
-static int64_t no_op(int64_t fromDate, char relation, asfreq_info *af_info) { return fromDate; }
+static npy_int64 nofunc(npy_int64 ordinal, char relation, asfreq_info *af_info) { return INT_ERR_CODE; }
+static npy_int64 no_op(npy_int64 ordinal, char relation, asfreq_info *af_info) { return ordinal; }
 
 // end of frequency specific conversion routines
 
@@ -875,9 +907,9 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
     }
 }
 
-double getAbsTime(int freq, int64_t dailyDate, int64_t originalDate) {
+double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate) {
 
-    int64_t startOfDay, periodsPerDay;
+    npy_int64 startOfDay, periodsPerDay;
 
     switch(freq)
     {
@@ -894,7 +926,8 @@ double getAbsTime(int freq, int64_t dailyDate, int64_t originalDate) {
 		  return 0; // 24*60*60 - 1;
     }
 
-    startOfDay = asfreq_DtoHIGHFREQ(dailyDate, 'S', periodsPerDay);
+    startOfDay = asfreq_DtoHIGHFREQ(dailyDate- ORD_OFFSET, 'S',
+									periodsPerDay);
     return (24*60*60)*((double)(originalDate - startOfDay))/((double)periodsPerDay);
 }
 
@@ -926,7 +959,7 @@ int dInfoCalc_SetFromAbsTime(struct date_info *dinfo,
    indicate the calendar to be used. */
 static
 int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo,
-                  int64_t absdate,
+                  npy_int64 absdate,
                   double abstime,
                   int calendar)
 {
@@ -957,9 +990,9 @@ int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo,
  * New pandas API-helper code, to expose to cython
  * ------------------------------------------------------------------*/
 
-int64_t asfreq(int64_t period_ordinal, int freq1, int freq2, char relation)
+npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation)
 {
-    int64_t val;
+    npy_int64 val;
     freq_conv_func func;
     asfreq_info finfo;
 
@@ -977,27 +1010,28 @@ int64_t asfreq(int64_t period_ordinal, int freq1, int freq2, char relation)
     return INT_ERR_CODE;
 }
 
+
 /* generate an ordinal in period space */
-int64_t get_period_ordinal(int year, int month, int day,
+npy_int64 get_period_ordinal(int year, int month, int day,
                       int hour, int minute, int second,
                       int freq)
 {
-	  int64_t absdays, delta;
-    int64_t weeks, days;
-    int64_t adj_ordinal, ordinal, day_adj;
+	  npy_int64 absdays, delta;
+    npy_int64 weeks, days;
+    npy_int64 adj_ordinal, ordinal, day_adj;
     int freq_group, fmonth, mdiff, quarter;
     freq_group = get_freq_group(freq);
 
     if (freq == FR_SEC) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - HIGHFREQ_ORIG);
-        return (int64_t)(delta*86400 + hour*3600 + minute*60 + second + 1);
+        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
+        return (npy_int64)(delta*86400 + hour*3600 + minute*60 + second + 1);
     }
 
     if (freq == FR_MIN) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - HIGHFREQ_ORIG);
-        return (int64_t)(delta*1440 + hour*60 + minute + 1);
+        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
+        return (npy_int64)(delta*1440 + hour*60 + minute + 1);
     }
 
     if (freq == FR_HR) {
@@ -1005,18 +1039,18 @@ int64_t get_period_ordinal(int year, int month, int day,
         {
             goto onError;
         }
-        delta = (absdays - HIGHFREQ_ORIG);
-        return (int64_t)(delta*24 + hour + 1);
+        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
+        return (npy_int64)(delta*24 + hour + 1);
     }
 
     if (freq == FR_DAY)
     {
-        return (int64_t)absdate_from_ymd(year, month, day);
+	  return (npy_int64) (absdate_from_ymd(year, month, day) - ORD_OFFSET);
     }
 
     if (freq == FR_UND)
     {
-        return (int64_t)absdate_from_ymd(year, month, day);
+	  return (npy_int64) (absdate_from_ymd(year, month, day) - ORD_OFFSET);
     }
 
     if (freq == FR_BUS)
@@ -1025,13 +1059,13 @@ int64_t get_period_ordinal(int year, int month, int day,
         {
             goto onError;
         }
-        weeks = days/7;
-        return (int64_t)(days - weeks*2);
+        weeks = days / 7;
+        return (npy_int64)(days - weeks*2);
     }
 
     if (freq_group == FR_WK)
     {
-        if((ordinal = (int64_t)absdate_from_ymd(year, month, day)) == INT_ERR_CODE)
+        if((ordinal = (npy_int64)absdate_from_ymd(year, month, day)) == INT_ERR_CODE)
         {
             goto onError;
         }
@@ -1042,7 +1076,7 @@ int64_t get_period_ordinal(int year, int month, int day,
 
     if (freq == FR_MTH)
     {
-        return (year-1)*12 + month;
+        return (year - BASE_YEAR) * 12 + month - 1;
     }
 
     if (freq_group == FR_QTR)
@@ -1054,7 +1088,7 @@ int64_t get_period_ordinal(int year, int month, int day,
       if (mdiff < 0) mdiff += 12;
       if (month >= fmonth) mdiff += 12;
 
-      return 1 + (year - 1) * 4 + (mdiff - 1) / 3;
+      return (year - BASE_YEAR) * 4 + (mdiff - 1) / 3;
     }
 
     if (freq_group == FR_ANN)
@@ -1062,10 +1096,10 @@ int64_t get_period_ordinal(int year, int month, int day,
       fmonth = freq - FR_ANN;
       if (fmonth == 0) fmonth = 12;
       if (month <= fmonth) {
-        return year;
+        return year - BASE_YEAR;
       }
       else {
-        return year + 1;
+        return year - BASE_YEAR + 1;
       }
     }
 
@@ -1082,17 +1116,17 @@ int64_t get_period_ordinal(int year, int month, int day,
     is calculated for the last day of the period.
 */
 
-int64_t get_python_ordinal(int64_t period_ordinal, int freq)
+npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq)
 {
     asfreq_info af_info;
-    int64_t (*toDaily)(int64_t, char, asfreq_info*);
+    npy_int64 (*toDaily)(npy_int64, char, asfreq_info*);
 
     if (freq == FR_DAY)
-        return period_ordinal;
+        return period_ordinal + ORD_OFFSET;
 
     toDaily = get_asfreq_func(freq, FR_DAY, 0);
     get_asfreq_info(freq, FR_DAY, &af_info);
-    return toDaily(period_ordinal, 'E', &af_info);
+    return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET;
 }
 
 char *str_replace(const char *s, const char *old, const char *new) {
@@ -1129,7 +1163,7 @@ char *str_replace(const char *s, const char *old, const char *new) {
 // function to generate a nice string representation of the period
 // object, originally from DateObject_strftime
 
-char *skts_strftime(int64_t value, int freq, PyObject *args)
+char *skts_strftime(npy_int64 ordinal, int freq, PyObject *args)
 {
     char *orig_fmt_str, *fmt_str;
     char *result;
@@ -1144,12 +1178,12 @@ char *skts_strftime(int64_t value, int freq, PyObject *args)
     int extra_fmts_found_one = 0;
     struct tm c_date;
     struct date_info tempDate;
-    int64_t absdate;
+    npy_int64 absdate, daily_ord;
     double abstime;
     int i, result_len;
     PyObject *py_result;
 
-    int64_t (*toDaily)(int64_t, char, asfreq_info*) = NULL;
+    npy_int64 (*toDaily)(npy_int64, char, asfreq_info*) = NULL;
     asfreq_info af_info;
 
     if (!PyArg_ParseTuple(args, "s:strftime(fmt)", &orig_fmt_str))
@@ -1158,10 +1192,12 @@ char *skts_strftime(int64_t value, int freq, PyObject *args)
     toDaily = get_asfreq_func(freq, FR_DAY, 0);
     get_asfreq_info(freq, FR_DAY, &af_info);
 
-    absdate = toDaily(value, 'E', &af_info);
-    abstime = getAbsTime(freq, absdate, value);
+    daily_ord = toDaily(ordinal, 'E', &af_info);
+    abstime = getAbsTime(freq, daily_ord + ORD_OFFSET, ordinal);
+
+	// printf("daily_ord: %d\n", (int) daily_ord);
 
-    if(dInfoCalc_SetFromAbsDateTime(&tempDate, absdate, abstime,
+    if(dInfoCalc_SetFromAbsDateTime(&tempDate, daily_ord + ORD_OFFSET, abstime,
                                     GREGORIAN_CALENDAR)) return NULL;
 
     // populate standard C date struct with info from our date_info struct
@@ -1221,7 +1257,7 @@ char *skts_strftime(int64_t value, int freq, PyObject *args)
                 } else { qtr_freq = FR_QTR; }
                 get_asfreq_info(FR_DAY, qtr_freq, &af_info);
 
-                if(DtoQ_yq(absdate, &af_info, &year, &quarter) == INT_ERR_CODE)
+                if(DtoQ_yq(daily_ord, &af_info, &year, &quarter) == INT_ERR_CODE)
                 { return NULL; }
 
                 if(strcmp(extra_fmts[i][0], "%q") == 0) {
@@ -1263,7 +1299,7 @@ char *skts_strftime(int64_t value, int freq, PyObject *args)
     return result;
 }
 
-char *period_to_string(int64_t value, int freq)
+char *period_to_string(npy_int64 value, int freq)
 {
     int freq_group = get_freq_group(freq);
     PyObject *string_arg;
@@ -1275,7 +1311,7 @@ char *period_to_string(int64_t value, int freq)
         if ((retval = PyArray_malloc(digits * sizeof(char))) == NULL) {
             return (char *)PyErr_NoMemory();
         }
-        sprintf(retval, "%ld", value);
+        sprintf(retval, "%ld", (long int) value);
         return retval;
     }
     else if (freq_group == FR_ANN) { string_arg = Py_BuildValue("(s)", "%Y"); }
@@ -1296,7 +1332,7 @@ char *period_to_string(int64_t value, int freq)
     return retval;
 }
 
-char *period_to_string2(int64_t value, int freq, char *fmt)
+char *period_to_string2(npy_int64 value, int freq, char *fmt)
 {
     PyObject *string_arg;
     char *retval;
@@ -1307,7 +1343,7 @@ char *period_to_string2(int64_t value, int freq, char *fmt)
     return retval;
 }
 
-static int _quarter_year(int64_t ordinal, int freq, int *year, int *quarter) {
+static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) {
     asfreq_info af_info;
     int qtr_freq;
 
@@ -1355,94 +1391,95 @@ static int _ISOWeek(struct date_info *dinfo)
     return week;
 }
 
-int get_date_info(int64_t ordinal, int freq, struct date_info *dinfo)
+int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo)
 {
-    int64_t absdate = get_python_ordinal(ordinal, freq);
+    npy_int64 absdate = get_python_ordinal(ordinal, freq);
     double abstime = getAbsTime(freq, absdate, ordinal);
 
-    if(dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime, GREGORIAN_CALENDAR))
+    if(dInfoCalc_SetFromAbsDateTime(dinfo, absdate,
+									abstime, GREGORIAN_CALENDAR))
         return INT_ERR_CODE;
 
     return 0;
 }
 
-int pyear(int64_t ordinal, int freq) {
+int pyear(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     get_date_info(ordinal, freq, &dinfo);
     return dinfo.year;
 }
 
-int pqyear(int64_t ordinal, int freq) {
+int pqyear(npy_int64 ordinal, int freq) {
     int year, quarter;
     if( _quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return year;
 }
 
-int pquarter(int64_t ordinal, int freq) {
+int pquarter(npy_int64 ordinal, int freq) {
     int year, quarter;
     if(_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return quarter;
 }
 
-int pmonth(int64_t ordinal, int freq) {
+int pmonth(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.month;
 }
 
-int pday(int64_t ordinal, int freq) {
+int pday(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.day;
 }
 
-int pweekday(int64_t ordinal, int freq) {
+int pweekday(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.day_of_week;
 }
 
-int pday_of_week(int64_t ordinal, int freq) {
+int pday_of_week(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.day_of_week;
 }
 
-int pday_of_year(int64_t ordinal, int freq) {
+int pday_of_year(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.day_of_year;
 }
 
-int pweek(int64_t ordinal, int freq) {
+int pweek(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return _ISOWeek(&dinfo);
 }
 
-int phour(int64_t ordinal, int freq) {
+int phour(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.hour;
 }
 
-int pminute(int64_t ordinal, int freq) {
+int pminute(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
     return dinfo.minute;
 }
 
-int psecond(int64_t ordinal, int freq) {
+int psecond(npy_int64 ordinal, int freq) {
     struct date_info dinfo;
     if(get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE)
         return INT_ERR_CODE;
diff --git a/pandas/src/period.h b/pandas/src/period.h
index f1e4f476ec924..e3bc190dd9b8e 100644
--- a/pandas/src/period.h
+++ b/pandas/src/period.h
@@ -10,6 +10,7 @@
 #include <Python.h>
 #include "numpy/ndarraytypes.h"
 #include "stdint.h"
+#include "limits.h"
 
 /*
  * declarations from period here
@@ -28,13 +29,13 @@
 // HIGHFREQ_ORIG is the datetime ordinal from which to begin the second
 // frequency ordinal sequence
 
-// begins second ordinal at 1/1/1AD gregorian proleptic calendar
-#define HIGHFREQ_ORIG 1
-
 // typedef int64_t npy_int64;
-
 // begins second ordinal at 1/1/1970 unix epoch
-// #define HIGHFREQ_ORIG 719163
+
+// #define HIGHFREQ_ORIG 62135683200LL
+#define BASE_YEAR 1970
+#define ORD_OFFSET 719163LL // days until 1970-01-01
+#define HIGHFREQ_ORIG 0 // ORD_OFFSET * 86400LL // days until 1970-01-01
 
 #define FR_ANN  1000  /* Annual */
 #define FR_ANNDEC  FR_ANN  /* Annual - December year end*/
@@ -103,7 +104,7 @@ typedef struct asfreq_info {
 
 
 typedef struct date_info {
-    int64_t absdate;
+    npy_int64 absdate;
     double abstime;
 
     double second;
@@ -118,40 +119,40 @@ typedef struct date_info {
     int calendar;
 } date_info;
 
-typedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*);
+typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info*);
 
 /*
  * new pandas API helper functions here
  */
 
-int64_t asfreq(int64_t period_ordinal, int freq1, int freq2, char relation);
+npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation);
 
-int64_t get_period_ordinal(int year, int month, int day,
+npy_int64 get_period_ordinal(int year, int month, int day,
                       int hour, int minute, int second,
                       int freq);
 
-int64_t get_python_ordinal(int64_t period_ordinal, int freq);
+npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq);
 
-char *skts_strftime(int64_t value, int freq, PyObject *args);
-char *period_to_string(int64_t value, int freq);
-char *period_to_string2(int64_t value, int freq, char *fmt);
+char *skts_strftime(npy_int64 value, int freq, PyObject *args);
+char *period_to_string(npy_int64 value, int freq);
+char *period_to_string2(npy_int64 value, int freq, char *fmt);
 
-int get_date_info(int64_t ordinal, int freq, struct date_info *dinfo);
+int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo);
 freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert);
 void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info);
 
-int pyear(int64_t ordinal, int freq);
-int pqyear(int64_t ordinal, int freq);
-int pquarter(int64_t ordinal, int freq);
-int pmonth(int64_t ordinal, int freq);
-int pday(int64_t ordinal, int freq);
-int pweekday(int64_t ordinal, int freq);
-int pday_of_week(int64_t ordinal, int freq);
-int pday_of_year(int64_t ordinal, int freq);
-int pweek(int64_t ordinal, int freq);
-int phour(int64_t ordinal, int freq);
-int pminute(int64_t ordinal, int freq);
-int psecond(int64_t ordinal, int freq);
-double getAbsTime(int freq, int64_t dailyDate, int64_t originalDate);
+int pyear(npy_int64 ordinal, int freq);
+int pqyear(npy_int64 ordinal, int freq);
+int pquarter(npy_int64 ordinal, int freq);
+int pmonth(npy_int64 ordinal, int freq);
+int pday(npy_int64 ordinal, int freq);
+int pweekday(npy_int64 ordinal, int freq);
+int pday_of_week(npy_int64 ordinal, int freq);
+int pday_of_year(npy_int64 ordinal, int freq);
+int pweek(npy_int64 ordinal, int freq);
+int phour(npy_int64 ordinal, int freq);
+int pminute(npy_int64 ordinal, int freq);
+int psecond(npy_int64 ordinal, int freq);
+double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate);
 
 #endif
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 8f8ce424d07ed..65bc784fdbf0e 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -671,7 +671,6 @@ include "skiplist.pyx"
 include "groupby.pyx"
 include "moments.pyx"
 include "reindex.pyx"
-include "generated.pyx"
 include "reduce.pyx"
 include "stats.pyx"
 include "properties.pyx"
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 96dc5f1c223d7..8a77cde766a26 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -14,6 +14,7 @@
 
 from pandas._tseries import Timestamp
 import pandas._tseries as lib
+import pandas._algos as _algos
 
 def _utc():
     import pytz
@@ -144,13 +145,13 @@ class DatetimeIndex(Int64Index):
     """
     _join_precedence = 10
 
-    _inner_indexer = _join_i8_wrapper(lib.inner_join_indexer_int64)
-    _outer_indexer = _join_i8_wrapper(lib.outer_join_indexer_int64)
-    _left_indexer  = _join_i8_wrapper(lib.left_join_indexer_int64,
+    _inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64)
+    _outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64)
+    _left_indexer  = _join_i8_wrapper(_algos.left_join_indexer_int64,
                                       with_indexers=False)
     _groupby = lib.groupby_arrays # _wrap_i8_function(lib.groupby_int64)
 
-    _arrmap = _wrap_dt_function(lib.arrmap_object)
+    _arrmap = _wrap_dt_function(_algos.arrmap_object)
 
     __eq__ = _dt_index_cmp('__eq__')
     __ne__ = _dt_index_cmp('__ne__')
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index f443ab6d99924..f6f9f3c6c31a3 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -25,8 +25,7 @@ def _period_field_accessor(name, alias=None):
         alias = name
     def f(self):
         base, mult = _gfc(self.freq)
-        g = getattr(lib, 'get_period_%s' % alias)
-        return g(self.ordinal, base, mult)
+        return lib.get_period_field(alias, self.ordinal, base, mult)
     f.__name__ = name
     return property(f)
 
@@ -35,8 +34,7 @@ def _field_accessor(name, alias=None):
         alias = name
     def f(self):
         base, mult = _gfc(self.freq)
-        g = getattr(lib, 'get_period_%s_arr' % alias)
-        return g(self.values, base, mult)
+        return lib.get_period_field_arr(alias, self.values, base, mult)
     f.__name__ = name
     return property(f)
 
@@ -99,8 +97,6 @@ def __init__(self, value=None, freq=None, ordinal=None,
         elif ordinal is not None:
             if not com.is_integer(ordinal):
                 raise ValueError("Ordinal must be an integer")
-            if ordinal <= 0:
-                raise ValueError("Ordinal must be positive")
             if freq is None:
                 raise ValueError('Must supply freq for ordinal value')
             self.ordinal = ordinal
@@ -259,19 +255,19 @@ def to_timestamp(self, freq=None, how='S'):
         ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how)
         return Timestamp(dt64, offset=to_offset(ts_freq))
 
-    year = _period_field_accessor('year')
-    month = _period_field_accessor('month')
-    day = _period_field_accessor('day')
-    hour = _period_field_accessor('hour')
-    minute = _period_field_accessor('minute')
-    second = _period_field_accessor('second')
-    weekofyear = _period_field_accessor('week')
+    year = _period_field_accessor('year', 0)
+    month = _period_field_accessor('month', 3)
+    day = _period_field_accessor('day', 4)
+    hour = _period_field_accessor('hour', 5)
+    minute = _period_field_accessor('minute', 6)
+    second = _period_field_accessor('second', 7)
+    weekofyear = _period_field_accessor('week', 8)
     week = weekofyear
-    dayofweek = _period_field_accessor('dayofweek', 'dow')
+    dayofweek = _period_field_accessor('dayofweek', 10)
     weekday = dayofweek
-    dayofyear = day_of_year = _period_field_accessor('dayofyear', 'doy')
-    quarter = _period_field_accessor('quarter')
-    qyear = _period_field_accessor('qyear')
+    dayofyear = day_of_year = _period_field_accessor('dayofyear', 9)
+    quarter = _period_field_accessor('quarter', 2)
+    qyear = _period_field_accessor('qyear', 1)
 
     @classmethod
     def now(cls, freq=None):
@@ -650,19 +646,19 @@ def asfreq(self, freq=None, how='E'):
         result.freq = freq
         return result
 
-    year = _field_accessor('year')
-    month = _field_accessor('month')
-    day = _field_accessor('day')
-    hour = _field_accessor('hour')
-    minute = _field_accessor('minute')
-    second = _field_accessor('second')
-    weekofyear = _field_accessor('week')
+    year = _field_accessor('year', 0)
+    month = _field_accessor('month', 3)
+    day = _field_accessor('day', 4)
+    hour = _field_accessor('hour', 5)
+    minute = _field_accessor('minute', 6)
+    second = _field_accessor('second', 7)
+    weekofyear = _field_accessor('week', 8)
     week = weekofyear
-    dayofweek = _field_accessor('dayofweek', 'dow')
+    dayofweek = _field_accessor('dayofweek', 10)
     weekday = dayofweek
-    dayofyear = day_of_year = _field_accessor('dayofyear', 'doy')
-    quarter = _field_accessor('quarter')
-    qyear = _field_accessor('qyear')
+    dayofyear = day_of_year = _field_accessor('dayofyear', 9)
+    quarter = _field_accessor('quarter', 2)
+    qyear = _field_accessor('qyear', 1)
 
     # Try to run function on index first, and then on elements of index
     # Especially important for group-by functionality
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 771d6387c127a..695faa52d379e 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -8,6 +8,7 @@
 
 from unittest import TestCase
 from datetime import datetime, timedelta
+import unittest
 
 from numpy.ma.testutils import assert_equal
 
@@ -1478,8 +1479,34 @@ def test_add(self):
         self.assertRaises(ValueError, dt1.__add__, dt2)
 
 
-###############################################################################
-#------------------------------------------------------------------------------
+class TestPeriodRepresentation(unittest.TestCase):
+    """
+    Wish to match NumPy units
+    """
+
+    def test_annual(self):
+        self._check_freq('A', 1970)
+
+    def test_monthly(self):
+        self._check_freq('M', '1970-01')
+
+    def test_daily(self):
+        self._check_freq('D', '1970-01-01')
+
+    def test_hourly(self):
+        self._check_freq('D', '1970-01-01')
+
+    def test_minutely(self):
+        self._check_freq('H', '1970-01-01 00:00:00')
+
+    def test_secondly(self):
+        self._check_freq('T', '1970-01-01 00:00:00')
+
+    def _check_freq(self, freq, base_date):
+        rng = PeriodIndex(start=base_date, periods=10, freq=freq)
+        exp = np.arange(10, dtype=np.int64)
+        self.assert_(np.array_equal(rng.values, exp))
+
 
 if __name__ == '__main__':
     import nose
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 4eff342e58c6e..2628386668082 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -1327,6 +1327,7 @@ def test_catch_infinite_loop(self):
                           datetime(2011,11,12), freq=offset)
 
 
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/setup.py b/setup.py
index 579a9e0102564..b2dfd734dae29 100755
--- a/setup.py
+++ b/setup.py
@@ -337,7 +337,7 @@ def run(self):
     cmdclass['sdist'] =  CheckSDist
 
 tseries_depends = ['reindex', 'groupby', 'skiplist', 'moments',
-                   'generated', 'reduce', 'stats', 'datetime',
+                   'reduce', 'stats', 'datetime',
                    'hashtable', 'inference', 'properties', 'join', 'engines']
 
 def srcpath(name=None, suffix='.pyx', subdir='src'):
@@ -350,6 +350,11 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
 else:
     tseries_depends = []
 
+algos_ext = Extension('pandas._algos',
+                      sources=[srcpath('generated', suffix=suffix)],
+                      include_dirs=[np.get_include()],
+                      )
+
 tseries_ext = Extension('pandas._tseries',
                         depends=tseries_depends + ['pandas/src/numpy_helper.h'],
                         sources=[srcpath('tseries', suffix=suffix),
@@ -387,7 +392,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                            sources=[srcpath('cppsandbox', suffix=suffix)],
                            include_dirs=[np.get_include()])
 
-extensions = [tseries_ext, sparse_ext, ujson_ext]
+extensions = [algos_ext, tseries_ext, sparse_ext, ujson_ext]
 
 if not ISRELEASED:
     extensions.extend([sandbox_ext])

From 11f2c0df3ac6e24ea3c8717c5252d8738b389597 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Thu, 10 May 2012 11:16:07 -0400
Subject: [PATCH 003/114] REF: have got things mostly working for #1150

---
 pandas/src/datetime.pyx             | 104 +---------------------------
 pandas/src/period.c                 |   5 +-
 pandas/tseries/period.py            |   2 +-
 pandas/tseries/tests/test_period.py |  42 +++++------
 4 files changed, 26 insertions(+), 127 deletions(-)

diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 823439b71ffc1..36e1b4cbf2600 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -1391,8 +1391,7 @@ ctypedef int (*accessor)(int64_t ordinal, int freq) except -1
 def get_period_field(int code, int64_t value, int freq,
                      int64_t mult):
     cdef accessor f = _get_accessor_func(code)
-    value = remove_mult(value, mult)
-    return f(value, freq)
+    return f(remove_mult(value, mult), freq)
 
 def get_period_field_arr(int code, ndarray[int64_t] arr,
                          int freq, int64_t mult):
@@ -1412,31 +1411,6 @@ def get_period_field_arr(int code, ndarray[int64_t] arr,
     return out
 
 
-cdef int apply_accessor(accessor func, int64_t value, int freq,
-                        int64_t mult) except -1:
-    value = remove_mult(value, mult)
-    return func(value, freq)
-
-# same but for arrays
-
-cdef ndarray[int64_t] apply_accessor_arr(accessor func, ndarray[int64_t] arr,
-                                         int freq, int64_t mult):
-    cdef:
-        Py_ssize_t i, sz
-        ndarray[int64_t] out
-        # accessor f
-
-    # f = _get_accessor_func(code)
-
-    sz = len(arr)
-    out = np.empty(sz, dtype=np.int64)
-
-    for i in range(sz):
-        out[i] = remove_mult(arr[i], mult)
-        out[i] = func(out[i], freq)
-
-    return out
-
 
 cdef accessor _get_accessor_func(int code):
     if code == 0:
@@ -1464,79 +1438,3 @@ cdef accessor _get_accessor_func(int code):
     else:
         raise ValueError('Unrecognized code: %s' % code)
 
-
-# def get_period_year_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pyear, arr, freq, mult)
-
-# def get_period_qyear_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pqyear, arr, freq, mult)
-
-# def get_period_quarter_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pquarter, arr, freq, mult)
-
-# def get_period_month_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pmonth, arr, freq, mult)
-
-# def get_period_day_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pday, arr, freq, mult)
-
-# def get_period_hour_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(phour, arr, freq, mult)
-
-# def get_period_minute_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pminute, arr, freq, mult)
-
-# def get_period_second_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(psecond, arr, freq, mult)
-
-# def get_period_dow_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pday_of_week, arr, freq, mult)
-
-# def get_period_week_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pweek, arr, freq, mult)
-
-# def get_period_weekday_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pweekday, arr, freq, mult)
-
-# def get_period_doy_arr(ndarray[int64_t] arr, int freq, int64_t mult):
-#     return apply_accessor_arr(pday_of_year, arr, freq, mult)
-
-# def get_abs_time(freq, dailyDate, originalDate):
-#     return getAbsTime(freq, dailyDate, originalDate)
-
-
-# cpdef int get_period_year(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pyear, value, freq, mult)
-
-# cpdef int get_period_qyear(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pqyear, value, freq, mult)
-
-# cpdef int get_period_quarter(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pquarter, value, freq, mult)
-
-# cpdef int get_period_month(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pmonth, value, freq, mult)
-
-# cpdef int get_period_day(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pday, value, freq, mult)
-
-# cpdef int get_period_hour(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(phour, value, freq, mult)
-
-# cpdef int get_period_minute(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pminute, value, freq, mult)
-
-# cpdef int get_period_second(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(psecond, value, freq, mult)
-
-# cpdef int get_period_dow(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pday_of_week, value, freq, mult)
-
-# cpdef int get_period_week(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pweek, value, freq, mult)
-
-# cpdef int get_period_weekday(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pweekday, value, freq, mult)
-
-# cpdef int get_period_doy(int64_t value, int freq, int64_t mult) except -1:
-#     return apply_accessor(pday_of_year, value, freq, mult)
diff --git a/pandas/src/period.c b/pandas/src/period.c
index ee44720a51810..e086b108b6b97 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -534,7 +534,7 @@ static int mod_compat(int x, int m) {
 
 static void MtoD_ym(npy_int64 ordinal, int *y, int *m) {
     *y = ordinal / 12 + BASE_YEAR;
-    *m = mod_compat(ordinal + 1, 12);
+    *m = mod_compat(ordinal, 12) + 1;
 }
 
 
@@ -548,7 +548,7 @@ static npy_int64 asfreq_MtoD(npy_int64 ordinal, char relation, asfreq_info *af_i
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
         return absdate - ORD_OFFSET;
     } else {
-        MtoD_ym(ordinal+1, &y, &m);
+        MtoD_ym(ordinal + 1, &y, &m);
         if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) return INT_ERR_CODE;
         return absdate - 1 - ORD_OFFSET;
     }
@@ -1394,6 +1394,7 @@ static int _ISOWeek(struct date_info *dinfo)
 int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo)
 {
     npy_int64 absdate = get_python_ordinal(ordinal, freq);
+	/* printf("freq: %d, absdate: %d\n", freq, (int) absdate); */
     double abstime = getAbsTime(freq, absdate, ordinal);
 
     if(dInfoCalc_SetFromAbsDateTime(dinfo, absdate,
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index f6f9f3c6c31a3..c8f921ca9c6fd 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -744,7 +744,7 @@ def get_value(self, series, key):
         """
         try:
             return super(PeriodIndex, self).get_value(series, key)
-        except KeyError:
+        except (KeyError, IndexError):
             try:
                 asdt, parsed, reso = parse_time_string(key, self.freq)
                 grp = _freq_mod._infer_period_group(reso)
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 695faa52d379e..29a79a35576d2 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -1304,27 +1304,27 @@ def test_to_period_quarterly(self):
             result = stamps.to_period(freq)
             self.assert_(rng.equals(result))
 
-    def test_iindex_multiples(self):
-        ii = PeriodIndex(start='1/1/10', end='12/31/12', freq='2M')
-        self.assertEquals(ii[0], Period('1/1/10', '2M'))
-        self.assertEquals(ii[1], Period('3/1/10', '2M'))
-
-        self.assertEquals(ii[0].asfreq('6M'), ii[2].asfreq('6M'))
-        self.assertEquals(ii[0].asfreq('A'), ii[2].asfreq('A'))
-
-        self.assertEquals(ii[0].asfreq('M', how='S'),
-                          Period('Jan 2010', '1M'))
-        self.assertEquals(ii[0].asfreq('M', how='E'),
-                          Period('Feb 2010', '1M'))
-        self.assertEquals(ii[1].asfreq('M', how='S'),
-                          Period('Mar 2010', '1M'))
-
-        i = Period('1/1/2010 12:05:18', '5S')
-        self.assertEquals(i, Period('1/1/2010 12:05:15', '5S'))
-
-        i = Period('1/1/2010 12:05:18', '5S')
-        self.assertEquals(i.asfreq('1S', how='E'),
-                          Period('1/1/2010 12:05:19', '1S'))
+    # def test_iindex_multiples(self):
+    #     ii = PeriodIndex(start='1/1/10', end='12/31/12', freq='2M')
+    #     self.assertEquals(ii[0], Period('1/1/10', '2M'))
+    #     self.assertEquals(ii[1], Period('3/1/10', '2M'))
+
+    #     self.assertEquals(ii[0].asfreq('6M'), ii[2].asfreq('6M'))
+    #     self.assertEquals(ii[0].asfreq('A'), ii[2].asfreq('A'))
+
+    #     self.assertEquals(ii[0].asfreq('M', how='S'),
+    #                       Period('Jan 2010', '1M'))
+    #     self.assertEquals(ii[0].asfreq('M', how='E'),
+    #                       Period('Feb 2010', '1M'))
+    #     self.assertEquals(ii[1].asfreq('M', how='S'),
+    #                       Period('Mar 2010', '1M'))
+
+    #     i = Period('1/1/2010 12:05:18', '5S')
+    #     self.assertEquals(i, Period('1/1/2010 12:05:15', '5S'))
+
+    #     i = Period('1/1/2010 12:05:18', '5S')
+    #     self.assertEquals(i.asfreq('1S', how='E'),
+    #                       Period('1/1/2010 12:05:19', '1S'))
 
     def test_iteration(self):
         index = PeriodIndex(start='1/1/10', periods=4, freq='B')

From e9dee697a28296b431cfe75a1287e167e320a5cf Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Thu, 10 May 2012 22:59:41 -0400
Subject: [PATCH 004/114] BUG: more bug fixes, have to fix intraday frequencies
 still

---
 pandas/src/period.c                 | 14 ++++++-------
 pandas/tseries/period.py            | 32 ++++++++++++++++++++++++++++-
 pandas/tseries/tests/test_period.py |  8 ++++----
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/pandas/src/period.c b/pandas/src/period.c
index e086b108b6b97..53302bee340e1 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -1024,14 +1024,14 @@ npy_int64 get_period_ordinal(int year, int month, int day,
 
     if (freq == FR_SEC) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
-        return (npy_int64)(delta*86400 + hour*3600 + minute*60 + second + 1);
+        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        return (npy_int64)(delta*86400 + hour*3600 + minute*60 + second);
     }
 
     if (freq == FR_MIN) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
-        return (npy_int64)(delta*1440 + hour*60 + minute + 1);
+        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        return (npy_int64)(delta*1440 + hour*60 + minute);
     }
 
     if (freq == FR_HR) {
@@ -1039,8 +1039,8 @@ npy_int64 get_period_ordinal(int year, int month, int day,
         {
             goto onError;
         }
-        delta = (absdays - ORD_OFFSET - HIGHFREQ_ORIG);
-        return (npy_int64)(delta*24 + hour + 1);
+        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        return (npy_int64)(delta*24 + hour);
     }
 
     if (freq == FR_DAY)
@@ -1347,7 +1347,7 @@ static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) {
     asfreq_info af_info;
     int qtr_freq;
 
-    ordinal = get_python_ordinal(ordinal, freq);
+    ordinal = get_python_ordinal(ordinal, freq) - ORD_OFFSET;
 
     if (get_freq_group(freq) == FR_QTR)
         qtr_freq = freq;
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index c8f921ca9c6fd..11dc22cf0ac18 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -10,7 +10,6 @@
 import pandas.tseries.frequencies as _freq_mod
 
 import pandas.core.common as com
-from pandas.util import py3compat
 
 from pandas._tseries import Timestamp
 import pandas._tseries as lib
@@ -470,6 +469,30 @@ def dt64arr_to_periodarr(data, freq):
 
 # --- Period index sketch
 
+
+def _period_index_cmp(opname):
+    """
+    Wrap comparison operations to convert datetime-like to datetime64
+    """
+    def wrapper(self, other):
+        if isinstance(other, Period):
+            func = getattr(self.values, opname)
+            assert(other.freq == self.freq)
+            result = func(other.ordinal)
+        elif isinstance(other, PeriodIndex):
+            assert(other.freq == self.freq)
+            return getattr(self.values, opname)(other.values)
+        else:
+            other = Period(other, freq=self.freq)
+            func = getattr(self.values, opname)
+            result = func(other.ordinal)
+        try:
+            return result.view(np.ndarray)
+        except:
+            return result
+    return wrapper
+
+
 class PeriodIndex(Int64Index):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
@@ -507,6 +530,13 @@ class PeriodIndex(Int64Index):
     """
     _box_scalars = True
 
+    __eq__ = _period_index_cmp('__eq__')
+    __ne__ = _period_index_cmp('__ne__')
+    __lt__ = _period_index_cmp('__lt__')
+    __gt__ = _period_index_cmp('__gt__')
+    __le__ = _period_index_cmp('__le__')
+    __ge__ = _period_index_cmp('__ge__')
+
     def __new__(cls, data=None,
                 freq=None, start=None, end=None, periods=None,
                 copy=False, name=None):
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 29a79a35576d2..22f715e9d51ac 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -145,7 +145,7 @@ def test_period_constructor(self):
         self.assertEqual(i1, expected)
 
         i1 = Period(ordinal=200701, freq='M')
-        self.assertEqual(i1.year, 16726)
+        self.assertEqual(i1.year, 18695)
 
         self.assertRaises(ValueError, Period, ordinal=200701)
 
@@ -1494,13 +1494,13 @@ def test_daily(self):
         self._check_freq('D', '1970-01-01')
 
     def test_hourly(self):
-        self._check_freq('D', '1970-01-01')
+        self._check_freq('H', '1970-01-01')
 
     def test_minutely(self):
-        self._check_freq('H', '1970-01-01 00:00:00')
+        self._check_freq('T', '1970-01-01')
 
     def test_secondly(self):
-        self._check_freq('T', '1970-01-01 00:00:00')
+        self._check_freq('S', '1970-01-01')
 
     def _check_freq(self, freq, base_date):
         rng = PeriodIndex(start=base_date, periods=10, freq=freq)

From 69d0baaeaf8281dd8b59b3f72a35d0befd3a32a4 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Fri, 11 May 2012 13:24:01 -0400
Subject: [PATCH 005/114] BUG: more intraday unit fixes

---
 pandas/src/period.c | 42 ++++++++++++++++++------------------------
 setup.py            | 27 ++++++++++++++++++---------
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/pandas/src/period.c b/pandas/src/period.c
index 53302bee340e1..7689323e1802b 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -367,13 +367,13 @@ static npy_int64 asfreq_DtoB_forConvert(npy_int64 ordinal, char relation, asfreq
 // needed for getDateInfo function
 static npy_int64 asfreq_DtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) { return ordinal; }
 
-static npy_int64 asfreq_DtoHIGHFREQ(npy_int64 ordinal, char relation, npy_int64 periodsPerDay) {
+static npy_int64 asfreq_DtoHIGHFREQ(npy_int64 ordinal, char relation, npy_int64 per_day) {
     if (ordinal >= HIGHFREQ_ORIG) {
         if (relation == 'S') {
-		  return (ordinal - HIGHFREQ_ORIG)*(periodsPerDay) + 1;
+		  return (ordinal - HIGHFREQ_ORIG) * per_day;
 		}
         else {
-		  return (ordinal - HIGHFREQ_ORIG + 1)*(periodsPerDay);
+		  return (ordinal - HIGHFREQ_ORIG + 1) * per_day - 1;
 		}
     } else { return INT_ERR_CODE; }
 }
@@ -388,7 +388,7 @@ static npy_int64 asfreq_DtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM SECONDLY ***************
 
 static npy_int64 asfreq_StoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/(60*60*24) + HIGHFREQ_ORIG; }
+    { return (ordinal)/(60*60*24) + HIGHFREQ_ORIG; }
 
 static npy_int64 asfreq_StoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
@@ -410,7 +410,7 @@ static npy_int64 asfreq_StoH(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM MINUTELY ***************
 
 static npy_int64 asfreq_TtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/(60*24) + HIGHFREQ_ORIG; }
+    { return (ordinal)/(60*24) + HIGHFREQ_ORIG; }
 
 static npy_int64 asfreq_TtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
@@ -435,7 +435,7 @@ static npy_int64 asfreq_TtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM HOURLY ***************
 
 static npy_int64 asfreq_HtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/24 + HIGHFREQ_ORIG; }
+    { return ordinal / 24 + HIGHFREQ_ORIG; }
 static npy_int64 asfreq_HtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 static npy_int64 asfreq_HtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
@@ -907,28 +907,27 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
     }
 }
 
-double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate) {
+double get_abs_time(int freq, npy_int64 daily_ord, npy_int64 ordinal) {
 
-    npy_int64 startOfDay, periodsPerDay;
+    npy_int64 start_ord, per_day;
 
     switch(freq)
     {
         case FR_HR:
-            periodsPerDay = 24;
+            per_day = 24;
             break;
         case FR_MIN:
-            periodsPerDay = 24*60;
+            per_day = 24*60;
             break;
         case FR_SEC:
-            periodsPerDay = 24*60*60;
+            per_day = 24*60*60;
             break;
         default:
 		  return 0; // 24*60*60 - 1;
     }
 
-    startOfDay = asfreq_DtoHIGHFREQ(dailyDate- ORD_OFFSET, 'S',
-									periodsPerDay);
-    return (24*60*60)*((double)(originalDate - startOfDay))/((double)periodsPerDay);
+    start_ord = asfreq_DtoHIGHFREQ(daily_ord, 'S', per_day);
+    return (24*60*60)*((double) (ordinal - start_ord)) / ((double) per_day);
 }
 
 /* Sets the time part of the DateTime object. */
@@ -971,15 +970,10 @@ int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo,
              abstime);
 
     /* Calculate the date */
-    if (dInfoCalc_SetFromAbsDate(dinfo,
-                  absdate,
-                  calendar))
-    goto onError;
+    if (dInfoCalc_SetFromAbsDate(dinfo, absdate, calendar)) goto onError;
 
     /* Calculate the time */
-    if (dInfoCalc_SetFromAbsTime(dinfo,
-                  abstime))
-    goto onError;
+    if (dInfoCalc_SetFromAbsTime(dinfo, abstime)) goto onError;
 
     return 0;
  onError:
@@ -1193,9 +1187,9 @@ char *skts_strftime(npy_int64 ordinal, int freq, PyObject *args)
     get_asfreq_info(freq, FR_DAY, &af_info);
 
     daily_ord = toDaily(ordinal, 'E', &af_info);
-    abstime = getAbsTime(freq, daily_ord + ORD_OFFSET, ordinal);
+    abstime = get_abs_time(freq, daily_ord, ordinal);
 
-	// printf("daily_ord: %d\n", (int) daily_ord);
+	printf("daily_ord: %d, abstime: %f \n", (int) daily_ord, abstime);
 
     if(dInfoCalc_SetFromAbsDateTime(&tempDate, daily_ord + ORD_OFFSET, abstime,
                                     GREGORIAN_CALENDAR)) return NULL;
@@ -1395,7 +1389,7 @@ int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo)
 {
     npy_int64 absdate = get_python_ordinal(ordinal, freq);
 	/* printf("freq: %d, absdate: %d\n", freq, (int) absdate); */
-    double abstime = getAbsTime(freq, absdate, ordinal);
+    double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal);
 
     if(dInfoCalc_SetFromAbsDateTime(dinfo, absdate,
 									abstime, GREGORIAN_CALENDAR))
diff --git a/setup.py b/setup.py
index b2dfd734dae29..761f86135f22c 100755
--- a/setup.py
+++ b/setup.py
@@ -356,15 +356,24 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                       )
 
 tseries_ext = Extension('pandas._tseries',
-                        depends=tseries_depends + ['pandas/src/numpy_helper.h'],
-                        sources=[srcpath('tseries', suffix=suffix),
-                                 'pandas/src/period.c',
-                                 'pandas/src/np_datetime.c',
-                                 'pandas/src/np_datetime_strings.c'],
-                        include_dirs=[np.get_include()],
-                        # pyrex_gdb=True,
-                        # extra_compile_args=['-Wconversion']
-                        )
+                      depends=tseries_depends + ['pandas/src/numpy_helper.h'],
+                      sources=[srcpath('tseries', suffix=suffix),
+                               'pandas/src/period.c',
+                               'pandas/src/np_datetime.c',
+                               'pandas/src/np_datetime_strings.c'],
+                      include_dirs=[np.get_include()],
+                      # pyrex_gdb=True,
+                      # extra_compile_args=['-Wconversion']
+                      )
+
+# tseries_ext = Extension('pandas._tseries',
+#                         depends=tseries_depends + ['pandas/src/numpy_helper.h'],
+#                         sources=[srcpath('datetime', suffix=suffix)],
+#                         include_dirs=[np.get_include()],
+#                         # pyrex_gdb=True,
+#                         # extra_compile_args=['-Wconversion']
+#                         )
+
 
 sparse_ext = Extension('pandas._sparse',
                        sources=[srcpath('sparse', suffix=suffix)],

From 5485c2dd818bda107aec940131511a651ded1d65 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Fri, 11 May 2012 16:33:08 -0400
Subject: [PATCH 006/114] BUG: test suite passes, though negative ordinals
 broken

---
 pandas/src/period.c          | 43 +++++++++++++++++++++++++++---------
 pandas/tests/test_tseries.py | 25 +++++++++++----------
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/pandas/src/period.c b/pandas/src/period.c
index 7689323e1802b..17513031581db 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -392,20 +392,29 @@ static npy_int64 asfreq_StoD(npy_int64 ordinal, char relation, asfreq_info *af_i
 
 static npy_int64 asfreq_StoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+
 static npy_int64 asfreq_StoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoQ(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+
 static npy_int64 asfreq_StoM(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoM(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+
 static npy_int64 asfreq_StoW(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoW(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
+
 static npy_int64 asfreq_StoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
+
 static npy_int64 asfreq_StoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB_forConvert(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-static npy_int64 asfreq_StoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/60 + 1; }
-static npy_int64 asfreq_StoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/(60*60) + 1; }
+
+static npy_int64 asfreq_StoT(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+	return ordinal / 60;
+}
+
+static npy_int64 asfreq_StoH(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+	return ordinal / (60*60);
+}
 
 //************ FROM MINUTELY ***************
 
@@ -426,11 +435,17 @@ static npy_int64 asfreq_TtoB(npy_int64 ordinal, char relation, asfreq_info *af_i
 static npy_int64 asfreq_TtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB_forConvert(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static npy_int64 asfreq_TtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal - 1)/60 + 1; }
+static npy_int64 asfreq_TtoH(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+	return ordinal / 60;
+}
+
 static npy_int64 asfreq_TtoS(npy_int64 ordinal, char relation, asfreq_info *af_info) {
-    if (relation == 'S') {  return ordinal*60 - 59; }
-    else                 {  return ordinal*60;      }}
+    if (relation == 'S') {
+		return ordinal*60; }
+    else                 {
+		return ordinal*60 + 59;
+	}
+}
 
 //************ FROM HOURLY ***************
 
@@ -453,9 +468,15 @@ static npy_int64 asfreq_HtoB_forConvert(npy_int64 ordinal, char relation, asfreq
 // calculation works out the same as TtoS, so we just call that function for HtoT
 static npy_int64 asfreq_HtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_TtoS(ordinal, relation, &NULL_AF_INFO); }
+
 static npy_int64 asfreq_HtoS(npy_int64 ordinal, char relation, asfreq_info *af_info) {
-    if (relation == 'S') {  return ordinal*60*60 - 60*60 + 1; }
-    else                 {  return ordinal*60*60;             }}
+    if (relation == 'S') {
+		return ordinal*60*60;
+	}
+    else {
+		return (ordinal + 1)*60*60 - 1;
+	}
+}
 
 //************ FROM BUSINESS ***************
 
@@ -1189,7 +1210,7 @@ char *skts_strftime(npy_int64 ordinal, int freq, PyObject *args)
     daily_ord = toDaily(ordinal, 'E', &af_info);
     abstime = get_abs_time(freq, daily_ord, ordinal);
 
-	printf("daily_ord: %d, abstime: %f \n", (int) daily_ord, abstime);
+	/* printf("daily_ord: %d, abstime: %f \n", (int) daily_ord, abstime); */
 
     if(dInfoCalc_SetFromAbsDateTime(&tempDate, daily_ord + ORD_OFFSET, abstime,
                                     GREGORIAN_CALENDAR)) return NULL;
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 86c031f5e01a0..12b515cb372da 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -6,6 +6,7 @@
 from pandas.util.testing import assert_almost_equal
 import pandas.util.testing as common
 import pandas._tseries as lib
+import pandas._algos as algos
 from datetime import datetime
 
 class TestTseriesUtil(unittest.TestCase):
@@ -29,7 +30,7 @@ def test_backfill(self):
         old = Index([1, 5, 10])
         new = Index(range(12))
 
-        filler = lib.backfill_int64(old, new)
+        filler = algos.backfill_int64(old, new)
 
         expect_filler = [0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1]
         self.assert_(np.array_equal(filler, expect_filler))
@@ -37,7 +38,7 @@ def test_backfill(self):
         # corner case
         old = Index([1, 4])
         new = Index(range(5, 10))
-        filler = lib.backfill_int64(old, new)
+        filler = algos.backfill_int64(old, new)
 
         expect_filler = [-1, -1, -1, -1, -1]
         self.assert_(np.array_equal(filler, expect_filler))
@@ -46,7 +47,7 @@ def test_pad(self):
         old = Index([1, 5, 10])
         new = Index(range(12))
 
-        filler = lib.pad_int64(old, new)
+        filler = algos.pad_int64(old, new)
 
         expect_filler = [-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2]
         self.assert_(np.array_equal(filler, expect_filler))
@@ -54,7 +55,7 @@ def test_pad(self):
         # corner case
         old = Index([5, 10])
         new = Index(range(5))
-        filler = lib.pad_int64(old, new)
+        filler = algos.pad_int64(old, new)
         expect_filler = [-1, -1, -1, -1, -1]
         self.assert_(np.array_equal(filler, expect_filler))
 
@@ -62,7 +63,7 @@ def test_left_join_indexer():
     a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
     b = np.array([2, 2, 3, 4, 4], dtype=np.int64)
 
-    result = lib.left_join_indexer_int64(b, a)
+    result = algos.left_join_indexer_int64(b, a)
     expected = np.array([1, 1, 2, 3, 3], dtype=np.int64)
     assert(np.array_equal(result, expected))
 
@@ -91,7 +92,7 @@ def test_inner_join_indexer():
     a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
     b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
 
-    index, ares, bres = lib.inner_join_indexer_int64(a, b)
+    index, ares, bres = algos.inner_join_indexer_int64(a, b)
 
     index_exp = np.array([3, 5], dtype=np.int64)
     assert_almost_equal(index, index_exp)
@@ -105,7 +106,7 @@ def test_outer_join_indexer():
     a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
     b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
 
-    index, ares, bres = lib.outer_join_indexer_int64(a, b)
+    index, ares, bres = algos.outer_join_indexer_int64(a, b)
 
     index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64)
     assert_almost_equal(index, index_exp)
@@ -233,25 +234,25 @@ def test_pad_backfill_object_segfault():
     old = np.array([], dtype='O')
     new = np.array([datetime(2010, 12, 31)], dtype='O')
 
-    result = lib.pad_object(old, new)
+    result = algos.pad_object(old, new)
     expected = np.array([-1], dtype=np.int64)
     assert(np.array_equal(result, expected))
 
-    result = lib.pad_object(new, old)
+    result = algos.pad_object(new, old)
     expected = np.array([], dtype=np.int64)
     assert(np.array_equal(result, expected))
 
-    result = lib.backfill_object(old, new)
+    result = algos.backfill_object(old, new)
     expected = np.array([-1], dtype=np.int64)
     assert(np.array_equal(result, expected))
 
-    result = lib.backfill_object(new, old)
+    result = algos.backfill_object(new, old)
     expected = np.array([], dtype=np.int64)
     assert(np.array_equal(result, expected))
 
 def test_arrmap():
     values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O')
-    result = lib.arrmap_object(values, lambda x: x in ['foo', 'bar'])
+    result = algos.arrmap_object(values, lambda x: x in ['foo', 'bar'])
     assert(result.dtype == np.bool_)
 
 def test_series_grouper():

From 879779dcd0e55f3db0aea5249155f25da21b6cdd Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 11:39:04 -0400
Subject: [PATCH 007/114] BUG: weekly and business daily unit support #1150

---
 pandas/src/datetime.pxd             |  45 +++++----
 pandas/src/datetime.pyx             |  13 ++-
 pandas/src/period.c                 | 144 +++++++++++++---------------
 pandas/src/period.h                 |   6 +-
 pandas/tseries/tests/test_period.py |  23 +++++
 5 files changed, 128 insertions(+), 103 deletions(-)

diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd
index ae37c3cbadefa..c16eaa7309870 100644
--- a/pandas/src/datetime.pxd
+++ b/pandas/src/datetime.pxd
@@ -1,6 +1,13 @@
 from numpy cimport int64_t
 from cpython cimport PyObject
 
+
+cdef extern from "stdint.h":
+    enum: INT64_MIN
+    enum: INT32_MIN
+
+
+
 cdef extern from "datetime.h":
 
     ctypedef class datetime.date [object PyDateTime_Date]:
@@ -128,36 +135,32 @@ cdef extern from "period.h":
 
     ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*)
 
-    int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except -1
-    freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
+    int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN
+    freq_conv_func get_asfreq_func(int fromFreq, int toFreq)
     void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info)
 
     int64_t get_period_ordinal(int year, int month, int day,
                           int hour, int minute, int second,
-                          int freq) except -1
+                          int freq) except INT32_MIN
 
-    int64_t get_python_ordinal(int64_t period_ordinal, int freq) except -1
+    int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN
 
     char *skts_strftime(int64_t value, int freq, PyObject *args)
     char *period_to_string(int64_t value, int freq)
     char *period_to_string2(int64_t value, int freq, char *fmt)
 
-    int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except -1
+    int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN
     double getAbsTime(int, int64_t, int64_t)
 
-    int pyear(int64_t ordinal, int freq) except -1
-    int pqyear(int64_t ordinal, int freq) except -1
-    int pquarter(int64_t ordinal, int freq) except -1
-    int pmonth(int64_t ordinal, int freq) except -1
-    int pday(int64_t ordinal, int freq) except -1
-    int pweekday(int64_t ordinal, int freq) except -1
-    int pday_of_week(int64_t ordinal, int freq) except -1
-    int pday_of_year(int64_t ordinal, int freq) except -1
-    int pweek(int64_t ordinal, int freq) except -1
-    int phour(int64_t ordinal, int freq) except -1
-    int pminute(int64_t ordinal, int freq) except -1
-    int psecond(int64_t ordinal, int freq) except -1
-
-cdef extern from "stdint.h":
-    enum: INT64_MIN
-
+    int pyear(int64_t ordinal, int freq) except INT32_MIN
+    int pqyear(int64_t ordinal, int freq) except INT32_MIN
+    int pquarter(int64_t ordinal, int freq) except INT32_MIN
+    int pmonth(int64_t ordinal, int freq) except INT32_MIN
+    int pday(int64_t ordinal, int freq) except INT32_MIN
+    int pweekday(int64_t ordinal, int freq) except INT32_MIN
+    int pday_of_week(int64_t ordinal, int freq) except INT32_MIN
+    int pday_of_year(int64_t ordinal, int freq) except INT32_MIN
+    int pweek(int64_t ordinal, int freq) except INT32_MIN
+    int phour(int64_t ordinal, int freq) except INT32_MIN
+    int pminute(int64_t ordinal, int freq) except INT32_MIN
+    int psecond(int64_t ordinal, int freq) except INT32_MIN
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 36e1b4cbf2600..3e4db56e4715c 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -1295,6 +1295,9 @@ cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int64_t mult1,
         retval = asfreq(period_ordinal, freq1, freq2, START)
     retval = apply_mult(retval, mult2)
 
+    if retval == INT32_MIN:
+        raise ValueError('Frequency conversion failed')
+
     return retval
 
 def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int64_t mult1,
@@ -1314,7 +1317,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int64_t mult1,
     n = len(arr)
     result = np.empty(n, dtype=np.int64)
 
-    func = get_asfreq_func(freq1, freq2, 0)
+    func = get_asfreq_func(freq1, freq2)
     get_asfreq_info(freq1, freq2, &finfo)
 
     if end:
@@ -1368,9 +1371,9 @@ def period_ordinal_to_string(int64_t value, int freq, int64_t mult):
     ptr = period_to_string(remove_mult(value, mult), freq)
 
     if ptr == NULL:
-        raise ValueError("Could not create string from ordinal '%d'" % value)
+        raise ValueError("Could not create string from ordinal '%s'" % value)
 
-    return <object>ptr
+    return <object> ptr
 
 def period_strftime(int64_t value, int freq, int64_t mult, object fmt):
     cdef:
@@ -1382,11 +1385,11 @@ def period_strftime(int64_t value, int freq, int64_t mult, object fmt):
     if ptr == NULL:
         raise ValueError("Could not create string with fmt '%s'" % fmt)
 
-    return <object>ptr
+    return <object> ptr
 
 # period accessors
 
-ctypedef int (*accessor)(int64_t ordinal, int freq) except -1
+ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN
 
 def get_period_field(int code, int64_t value, int freq,
                      int64_t mult):
diff --git a/pandas/src/period.c b/pandas/src/period.c
index 17513031581db..447a183c19821 100644
--- a/pandas/src/period.c
+++ b/pandas/src/period.c
@@ -13,6 +13,13 @@
  * Code derived from scikits.timeseries
  * ------------------------------------------------------------------*/
 
+
+static int mod_compat(int x, int m) {
+  int result = x % m;
+  if (result < 0) return result + m;
+  return result;
+}
+
 static asfreq_info NULL_AF_INFO;
 
 /* Table with day offsets for each month (0-based, without and with leap) */
@@ -253,24 +260,24 @@ int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo,
 
 // helpers for frequency conversion routines //
 
-static npy_int64 DtoB_weekday(npy_int64 ordinal) {
-    return (((ordinal) / 7) * 5) + (ordinal) % 7;
+static npy_int64 DtoB_weekday(npy_int64 absdate) {
+    return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET;
 }
 
-static npy_int64 DtoB_WeekendToMonday(npy_int64 ordinal, int day_of_week) {
+static npy_int64 DtoB_WeekendToMonday(npy_int64 absdate, int day_of_week) {
     if (day_of_week > 4) {
         //change to Monday after weekend
-        ordinal += (7 - day_of_week);
+        absdate += (7 - day_of_week);
     }
-    return DtoB_weekday(ordinal);
+    return DtoB_weekday(absdate);
 }
 
-static npy_int64 DtoB_WeekendToFriday(npy_int64 ordinal, int day_of_week) {
+static npy_int64 DtoB_WeekendToFriday(npy_int64 absdate, int day_of_week) {
     if (day_of_week > 4) {
         //change to friday before weekend
-        ordinal -= (day_of_week - 4);
+        absdate -= (day_of_week - 4);
     }
-    return DtoB_weekday(ordinal);
+    return DtoB_weekday(absdate);
 }
 
 static npy_int64 absdate_from_ymd(int y, int m, int d) {
@@ -335,7 +342,7 @@ static npy_int64 asfreq_DtoM(npy_int64 ordinal, char relation, asfreq_info *af_i
 }
 
 static npy_int64 asfreq_DtoW(npy_int64 ordinal, char relation, asfreq_info *af_info) {
-    return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end))/7 + 1;
+    return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end))/7 + 1 - WEEK_OFFSET;
 }
 
 static npy_int64 asfreq_DtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) {
@@ -351,31 +358,16 @@ static npy_int64 asfreq_DtoB(npy_int64 ordinal, char relation, asfreq_info *af_i
     }
 }
 
-static npy_int64 asfreq_DtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info) {
-
-    struct date_info dinfo;
-    if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, GREGORIAN_CALENDAR))
-        return INT_ERR_CODE;
-
-    if (dinfo.day_of_week > 4) {
-        return INT_ERR_CODE;
-    } else {
-        return DtoB_weekday(ordinal);
-    }
-}
-
 // needed for getDateInfo function
 static npy_int64 asfreq_DtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) { return ordinal; }
 
 static npy_int64 asfreq_DtoHIGHFREQ(npy_int64 ordinal, char relation, npy_int64 per_day) {
-    if (ordinal >= HIGHFREQ_ORIG) {
-        if (relation == 'S') {
-		  return (ordinal - HIGHFREQ_ORIG) * per_day;
-		}
-        else {
-		  return (ordinal - HIGHFREQ_ORIG + 1) * per_day - 1;
-		}
-    } else { return INT_ERR_CODE; }
+	if (relation == 'S') {
+	  return ordinal * per_day;
+	}
+	else {
+	  return (ordinal+ 1) * per_day - 1;
+	}
 }
 
 static npy_int64 asfreq_DtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
@@ -388,7 +380,7 @@ static npy_int64 asfreq_DtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM SECONDLY ***************
 
 static npy_int64 asfreq_StoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal)/(60*60*24) + HIGHFREQ_ORIG; }
+    { return (ordinal)/(60*60*24); }
 
 static npy_int64 asfreq_StoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
@@ -405,8 +397,6 @@ static npy_int64 asfreq_StoW(npy_int64 ordinal, char relation, asfreq_info *af_i
 static npy_int64 asfreq_StoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static npy_int64 asfreq_StoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_StoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
 static npy_int64 asfreq_StoT(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 	return ordinal / 60;
@@ -419,7 +409,7 @@ static npy_int64 asfreq_StoH(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM MINUTELY ***************
 
 static npy_int64 asfreq_TtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return (ordinal)/(60*24) + HIGHFREQ_ORIG; }
+    { return (ordinal)/(60*24); }
 
 static npy_int64 asfreq_TtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
@@ -432,9 +422,6 @@ static npy_int64 asfreq_TtoW(npy_int64 ordinal, char relation, asfreq_info *af_i
 static npy_int64 asfreq_TtoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static npy_int64 asfreq_TtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_TtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-
 static npy_int64 asfreq_TtoH(npy_int64 ordinal, char relation, asfreq_info *af_info) {
 	return ordinal / 60;
 }
@@ -450,7 +437,7 @@ static npy_int64 asfreq_TtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM HOURLY ***************
 
 static npy_int64 asfreq_HtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return ordinal / 24 + HIGHFREQ_ORIG; }
+    { return ordinal / 24; }
 static npy_int64 asfreq_HtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoA(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, af_info); }
 static npy_int64 asfreq_HtoQ(npy_int64 ordinal, char relation, asfreq_info *af_info)
@@ -462,9 +449,6 @@ static npy_int64 asfreq_HtoW(npy_int64 ordinal, char relation, asfreq_info *af_i
 static npy_int64 asfreq_HtoB(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_DtoB(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
 
-static npy_int64 asfreq_HtoB_forConvert(npy_int64 ordinal, char relation, asfreq_info *af_info)
-    { return asfreq_DtoB_forConvert(asfreq_HtoD(ordinal, relation, &NULL_AF_INFO), relation, &NULL_AF_INFO); }
-
 // calculation works out the same as TtoS, so we just call that function for HtoT
 static npy_int64 asfreq_HtoT(npy_int64 ordinal, char relation, asfreq_info *af_info)
     { return asfreq_TtoS(ordinal, relation, &NULL_AF_INFO); }
@@ -482,7 +466,9 @@ static npy_int64 asfreq_HtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 
 static npy_int64 asfreq_BtoD(npy_int64 ordinal, char relation, asfreq_info *af_info)
     {
-	  return ((ordinal-1)/5)*7 + (ordinal-1)%5 + 1- ORD_OFFSET;
+		ordinal += BDAY_OFFSET;
+		return (((ordinal - 1) / 5) * 7 +
+				mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET);
 	}
 
 static npy_int64 asfreq_BtoA(npy_int64 ordinal, char relation, asfreq_info *af_info)
@@ -509,6 +495,7 @@ static npy_int64 asfreq_BtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 //************ FROM WEEKLY ***************
 
 static npy_int64 asfreq_WtoD(npy_int64 ordinal, char relation, asfreq_info *af_info) {
+	ordinal += WEEK_OFFSET;
     if (relation == 'S') {
 	  return ordinal * 7 - 6 + af_info->from_week_end - ORD_OFFSET;
 	}
@@ -534,8 +521,12 @@ static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, asfreq_info *af_i
 								 asfreq_WtoD(ordinal, relation, af_info) + ORD_OFFSET,
                     GREGORIAN_CALENDAR)) return INT_ERR_CODE;
 
-    if (relation == 'S') { return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); }
-    else                 { return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); }
+    if (relation == 'S') {
+		return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week);
+	}
+    else {
+		return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week);
+	}
 }
 
 static npy_int64 asfreq_WtoH(npy_int64 ordinal, char relation, asfreq_info *af_info)
@@ -547,12 +538,6 @@ static npy_int64 asfreq_WtoS(npy_int64 ordinal, char relation, asfreq_info *af_i
 
 //************ FROM MONTHLY ***************
 
-static int mod_compat(int x, int m) {
-  int result = x % m;
-  if (result < 0) return result + m;
-  return result;
-}
-
 static void MtoD_ym(npy_int64 ordinal, int *y, int *m) {
     *y = ordinal / 12 + BASE_YEAR;
     *m = mod_compat(ordinal, 12) + 1;
@@ -773,7 +758,7 @@ void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) {
 }
 
 
-freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
+freq_conv_func get_asfreq_func(int fromFreq, int toFreq)
 {
     int fromGroup = get_freq_group(fromFreq);
     int toGroup = get_freq_group(toFreq);
@@ -864,9 +849,7 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
                 case FR_QTR: return &asfreq_DtoQ;
                 case FR_MTH: return &asfreq_DtoM;
                 case FR_WK: return &asfreq_DtoW;
-                case FR_BUS:
-                    if (forConvert) { return &asfreq_DtoB_forConvert; }
-                    else            { return &asfreq_DtoB; }
+                case FR_BUS: return &asfreq_DtoB;
                 case FR_DAY: return &asfreq_DtoD;
                 case FR_HR: return &asfreq_DtoH;
                 case FR_MIN: return &asfreq_DtoT;
@@ -881,9 +864,7 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
                 case FR_QTR: return &asfreq_HtoQ;
                 case FR_MTH: return &asfreq_HtoM;
                 case FR_WK: return &asfreq_HtoW;
-                case FR_BUS:
-                    if (forConvert) { return &asfreq_HtoB_forConvert; }
-                    else            { return &asfreq_HtoB; }
+                case FR_BUS: return &asfreq_HtoB;
                 case FR_DAY: return &asfreq_HtoD;
                 case FR_HR: return &no_op;
                 case FR_MIN: return &asfreq_HtoT;
@@ -898,9 +879,7 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
                 case FR_QTR: return &asfreq_TtoQ;
                 case FR_MTH: return &asfreq_TtoM;
                 case FR_WK: return &asfreq_TtoW;
-                case FR_BUS:
-                    if (forConvert) { return &asfreq_TtoB_forConvert; }
-                    else            { return &asfreq_TtoB; }
+                case FR_BUS: return &asfreq_TtoB;
                 case FR_DAY: return &asfreq_TtoD;
                 case FR_HR: return &asfreq_TtoH;
                 case FR_MIN: return &no_op;
@@ -915,9 +894,7 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
                 case FR_QTR: return &asfreq_StoQ;
                 case FR_MTH: return &asfreq_StoM;
                 case FR_WK: return &asfreq_StoW;
-                case FR_BUS:
-                    if (forConvert) { return &asfreq_StoB_forConvert; }
-                    else            { return &asfreq_StoB; }
+                case FR_BUS: return &asfreq_StoB;
                 case FR_DAY: return &asfreq_StoD;
                 case FR_HR: return &asfreq_StoH;
                 case FR_MIN: return &asfreq_StoT;
@@ -930,25 +907,33 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert)
 
 double get_abs_time(int freq, npy_int64 daily_ord, npy_int64 ordinal) {
 
-    npy_int64 start_ord, per_day;
-
+    npy_int64 start_ord, per_day, unit;
     switch(freq)
     {
         case FR_HR:
             per_day = 24;
+			unit = 60 * 60;
             break;
         case FR_MIN:
             per_day = 24*60;
+			unit = 60;
             break;
         case FR_SEC:
             per_day = 24*60*60;
+			unit = 1;
             break;
         default:
 		  return 0; // 24*60*60 - 1;
     }
 
     start_ord = asfreq_DtoHIGHFREQ(daily_ord, 'S', per_day);
-    return (24*60*60)*((double) (ordinal - start_ord)) / ((double) per_day);
+	/* printf("start_ord: %d\n", start_ord); */
+	return (double) ( unit * (ordinal - start_ord));
+	/* if (ordinal >= 0) { */
+	/* } */
+	/* else { */
+	/* 	return (double) (unit * mod_compat(ordinal - start_ord, per_day)); */
+	/* } */
 }
 
 /* Sets the time part of the DateTime object. */
@@ -1011,13 +996,13 @@ npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation)
     freq_conv_func func;
     asfreq_info finfo;
 
-	func = get_asfreq_func(freq1, freq2, 0);
+	func = get_asfreq_func(freq1, freq2);
     get_asfreq_info(freq1, freq2, &finfo);
 
     val = (*func)(period_ordinal, relation, &finfo);
 
     if (val == INT_ERR_CODE) {
-        Py_Error(PyExc_ValueError, "Unable to convert to desired frequency.");
+        // Py_Error(PyExc_ValueError, "Unable to convert to desired frequency.");
 		goto onError;
 	}
     return val;
@@ -1039,13 +1024,13 @@ npy_int64 get_period_ordinal(int year, int month, int day,
 
     if (freq == FR_SEC) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        delta = (absdays - ORD_OFFSET);
         return (npy_int64)(delta*86400 + hour*3600 + minute*60 + second);
     }
 
     if (freq == FR_MIN) {
         absdays = absdate_from_ymd(year, month, day);
-        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        delta = (absdays - ORD_OFFSET);
         return (npy_int64)(delta*1440 + hour*60 + minute);
     }
 
@@ -1054,7 +1039,7 @@ npy_int64 get_period_ordinal(int year, int month, int day,
         {
             goto onError;
         }
-        delta = (absdays - ORD_OFFSET + HIGHFREQ_ORIG);
+        delta = (absdays - ORD_OFFSET);
         return (npy_int64)(delta*24 + hour);
     }
 
@@ -1075,7 +1060,7 @@ npy_int64 get_period_ordinal(int year, int month, int day,
             goto onError;
         }
         weeks = days / 7;
-        return (npy_int64)(days - weeks*2);
+        return (npy_int64)(days - weeks * 2) - BDAY_OFFSET;
     }
 
     if (freq_group == FR_WK)
@@ -1086,7 +1071,7 @@ npy_int64 get_period_ordinal(int year, int month, int day,
         }
         day_adj = (7 - (freq - FR_WK)) % 7;
         adj_ordinal = ordinal + ((7 - day_adj) - ordinal % 7) % 7;
-        return adj_ordinal/7;
+        return adj_ordinal / 7 - WEEK_OFFSET;
     }
 
     if (freq == FR_MTH)
@@ -1139,7 +1124,7 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq)
     if (freq == FR_DAY)
         return period_ordinal + ORD_OFFSET;
 
-    toDaily = get_asfreq_func(freq, FR_DAY, 0);
+    toDaily = get_asfreq_func(freq, FR_DAY);
     get_asfreq_info(freq, FR_DAY, &af_info);
     return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET;
 }
@@ -1204,12 +1189,17 @@ char *skts_strftime(npy_int64 ordinal, int freq, PyObject *args)
     if (!PyArg_ParseTuple(args, "s:strftime(fmt)", &orig_fmt_str))
         return NULL;
 
-    toDaily = get_asfreq_func(freq, FR_DAY, 0);
+    toDaily = get_asfreq_func(freq, FR_DAY);
     get_asfreq_info(freq, FR_DAY, &af_info);
 
     daily_ord = toDaily(ordinal, 'E', &af_info);
     abstime = get_abs_time(freq, daily_ord, ordinal);
 
+	if (abstime < 0) {
+		abstime += 86400;
+		daily_ord -= 1;
+	}
+
 	/* printf("daily_ord: %d, abstime: %f \n", (int) daily_ord, abstime); */
 
     if(dInfoCalc_SetFromAbsDateTime(&tempDate, daily_ord + ORD_OFFSET, abstime,
@@ -1411,6 +1401,10 @@ int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo)
     npy_int64 absdate = get_python_ordinal(ordinal, freq);
 	/* printf("freq: %d, absdate: %d\n", freq, (int) absdate); */
     double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal);
+	if (abstime < 0) {
+		abstime += 86400;
+		absdate -= 1;
+	}
 
     if(dInfoCalc_SetFromAbsDateTime(dinfo, absdate,
 									abstime, GREGORIAN_CALENDAR))
diff --git a/pandas/src/period.h b/pandas/src/period.h
index e3bc190dd9b8e..1ece756b8fb75 100644
--- a/pandas/src/period.h
+++ b/pandas/src/period.h
@@ -35,6 +35,8 @@
 // #define HIGHFREQ_ORIG 62135683200LL
 #define BASE_YEAR 1970
 #define ORD_OFFSET 719163LL // days until 1970-01-01
+#define BDAY_OFFSET 513689LL // days until 1970-01-01
+#define WEEK_OFFSET 102737LL
 #define HIGHFREQ_ORIG 0 // ORD_OFFSET * 86400LL // days until 1970-01-01
 
 #define FR_ANN  1000  /* Annual */
@@ -86,7 +88,7 @@
 
 #define FR_UND  -10000 /* Undefined */
 
-#define INT_ERR_CODE -1
+#define INT_ERR_CODE INT32_MIN
 
 #define MEM_CHECK(item) if (item == NULL) { return PyErr_NoMemory(); }
 #define ERR_CHECK(item) if (item == NULL) { return NULL; }
@@ -138,7 +140,7 @@ char *period_to_string(npy_int64 value, int freq);
 char *period_to_string2(npy_int64 value, int freq, char *fmt);
 
 int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo);
-freq_conv_func get_asfreq_func(int fromFreq, int toFreq, int forConvert);
+freq_conv_func get_asfreq_func(int fromFreq, int toFreq);
 void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info);
 
 int pyear(npy_int64 ordinal, int freq);
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 22f715e9d51ac..92441661a8cf1 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -1490,9 +1490,15 @@ def test_annual(self):
     def test_monthly(self):
         self._check_freq('M', '1970-01')
 
+    def test_weekly(self):
+        self._check_freq('W-THU', '1970-01-01')
+
     def test_daily(self):
         self._check_freq('D', '1970-01-01')
 
+    def test_business_daily(self):
+        self._check_freq('B', '1970-01-01')
+
     def test_hourly(self):
         self._check_freq('H', '1970-01-01')
 
@@ -1507,6 +1513,23 @@ def _check_freq(self, freq, base_date):
         exp = np.arange(10, dtype=np.int64)
         self.assert_(np.array_equal(rng.values, exp))
 
+    def test_negone_ordinals(self):
+        freqs = ['A', 'M', 'Q', 'D','H', 'T', 'S']
+
+        period = Period(ordinal=-1, freq='D')
+        for freq in freqs:
+            repr(period.asfreq(freq))
+
+        for freq in freqs:
+            period = Period(ordinal=-1, freq=freq)
+            repr(period)
+            self.assertEquals(period.year, 1969)
+
+        period = Period(ordinal=-1, freq='B')
+        repr(period)
+        period = Period(ordinal=-1, freq='W')
+        repr(period)
+
 
 if __name__ == '__main__':
     import nose

From 85fcd6935b036ea083b5471958a79c27775ecc59 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 12:02:58 -0400
Subject: [PATCH 008/114] REF: remove period multipliers, close #1199

---
 pandas/src/datetime.pyx             | 52 ++++++++----------------
 pandas/tseries/frequencies.py       |  2 +-
 pandas/tseries/period.py            | 62 +++++++++++++++++------------
 pandas/tseries/tests/test_period.py | 11 +++--
 4 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 3e4db56e4715c..c481d7a020050 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -1233,7 +1233,7 @@ cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult):
 
     return period_ord_w_mult * mult + 1;
 
-def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, int64_t mult):
+def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
     """
     Convert array of datetime64 values (passed in as 'i8' dtype) to a set of
     periods corresponding to desired frequency, per period convention.
@@ -1251,10 +1251,9 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, int64_t mult):
         PyArray_DatetimeToDatetimeStruct(dtarr[i], NPY_FR_us, &dts)
         out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
                                   dts.hour, dts.min, dts.sec, freq)
-        out[i] = apply_mult(out[i], mult)
     return out
 
-def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq, int64_t mult):
+def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
     """
     Convert array to datetime64 values from a set of ordinals corresponding to
     periods per period convention.
@@ -1268,15 +1267,15 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq, int64_t mult):
     out = np.empty(l, dtype='i8')
 
     for i in range(l):
-        out[i] = period_ordinal_to_dt64(periodarr[i], freq, mult)
+        out[i] = period_ordinal_to_dt64(periodarr[i], freq)
 
     return out
 
 cdef char START = 'S'
 cdef char END = 'E'
 
-cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int64_t mult1,
-                            int freq2, int64_t mult2, bint end):
+cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2,
+                            bint end):
     """
     Convert period ordinal from one frequency to another, and if upsampling,
     choose to use start ('S') or end ('E') of period.
@@ -1284,24 +1283,17 @@ cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int64_t mult1,
     cdef:
         int64_t retval
 
-    period_ordinal = remove_mult(period_ordinal, mult1)
-
-    if mult1 != 1 and end:
-        period_ordinal += (mult1 - 1)
-
     if end:
         retval = asfreq(period_ordinal, freq1, freq2, END)
     else:
         retval = asfreq(period_ordinal, freq1, freq2, START)
-    retval = apply_mult(retval, mult2)
 
     if retval == INT32_MIN:
         raise ValueError('Frequency conversion failed')
 
     return retval
 
-def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int64_t mult1,
-                      int freq2, int64_t mult2, bint end):
+def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
     """
     Convert int64-array of period ordinals from one frequency to another, and
     if upsampling, choose to use start ('S') or end ('E') of period.
@@ -1326,32 +1318,25 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int64_t mult1,
         relation = START
 
     for i in range(n):
-        ordinal = remove_mult(arr[i], mult1)
         val = func(arr[i], relation, &finfo)
         if val == -1:
             raise ValueError("Unable to convert to desired frequency.")
-        result[i] = apply_mult(val, mult2)
+        result[i] = val
 
     return result
 
-def period_ordinal(int y, int m, int d, int h, int min, int s,
-                   int freq, int64_t mult):
+def period_ordinal(int y, int m, int d, int h, int min, int s, int freq):
     cdef:
         int64_t ordinal
 
-    ordinal = get_period_ordinal(y, m, d, h, min, s, freq)
+    return get_period_ordinal(y, m, d, h, min, s, freq)
 
-    return apply_mult(ordinal, mult)
 
-cpdef int64_t period_ordinal_to_dt64(int64_t period_ordinal, int freq,
-                                     int64_t mult):
+cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
     cdef:
-        int64_t ordinal
         npy_datetimestruct dts
         date_info dinfo
 
-    ordinal = remove_mult(period_ordinal, mult)
-
     get_date_info(ordinal, freq, &dinfo)
 
     dts.year = dinfo.year
@@ -1364,22 +1349,21 @@ cpdef int64_t period_ordinal_to_dt64(int64_t period_ordinal, int freq,
 
     return PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
 
-def period_ordinal_to_string(int64_t value, int freq, int64_t mult):
+def period_ordinal_to_string(int64_t value, int freq):
     cdef:
         char *ptr
 
-    ptr = period_to_string(remove_mult(value, mult), freq)
+    ptr = period_to_string(value, freq)
 
     if ptr == NULL:
         raise ValueError("Could not create string from ordinal '%s'" % value)
 
     return <object> ptr
 
-def period_strftime(int64_t value, int freq, int64_t mult, object fmt):
+def period_strftime(int64_t value, int freq, object fmt):
     cdef:
         char *ptr
 
-    value = remove_mult(value, mult)
     ptr = period_to_string2(value, freq, <char*>fmt)
 
     if ptr == NULL:
@@ -1391,13 +1375,11 @@ def period_strftime(int64_t value, int freq, int64_t mult, object fmt):
 
 ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN
 
-def get_period_field(int code, int64_t value, int freq,
-                     int64_t mult):
+def get_period_field(int code, int64_t value, int freq):
     cdef accessor f = _get_accessor_func(code)
-    return f(remove_mult(value, mult), freq)
+    return f(value, freq)
 
-def get_period_field_arr(int code, ndarray[int64_t] arr,
-                         int freq, int64_t mult):
+def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
     cdef:
         Py_ssize_t i, sz
         ndarray[int64_t] out
@@ -1409,7 +1391,7 @@ def get_period_field_arr(int code, ndarray[int64_t] arr,
     out = np.empty(sz, dtype=np.int64)
 
     for i in range(sz):
-        out[i] = f(remove_mult(arr[i], mult), freq)
+        out[i] = f(arr[i], freq)
 
     return out
 
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index e555700863dc9..705d66d84f4bf 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -68,7 +68,7 @@ def get_freq_code(freqstr):
     return code, stride
 
 
-def _get_freq_str(base, mult):
+def _get_freq_str(base, mult=1):
     code = _reverse_period_code_map.get(base)
     if code is None:
         return _unknown_freq
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 11dc22cf0ac18..3d7f730af47ed 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -24,7 +24,7 @@ def _period_field_accessor(name, alias=None):
         alias = name
     def f(self):
         base, mult = _gfc(self.freq)
-        return lib.get_period_field(alias, self.ordinal, base, mult)
+        return lib.get_period_field(alias, self.ordinal, base)
     f.__name__ = name
     return property(f)
 
@@ -33,7 +33,7 @@ def _field_accessor(name, alias=None):
         alias = name
     def f(self):
         base, mult = _gfc(self.freq)
-        return lib.get_period_field_arr(alias, self.values, base, mult)
+        return lib.get_period_field_arr(alias, self.values, base)
     f.__name__ = name
     return property(f)
 
@@ -108,6 +108,8 @@ def __init__(self, value=None, freq=None, ordinal=None,
                 raise ValueError("If value is None, year cannot be None")
 
             base, mult = _gfc(freq)
+            if mult != 1:
+                raise ValueError('Only mult == 1 supported')
 
             if quarter is not None:
                 mnum = _month_numbers[_freq_mod._get_rule_month(freq)] + 1
@@ -116,7 +118,7 @@ def __init__(self, value=None, freq=None, ordinal=None,
                     year -= 1
 
             self.ordinal = lib.period_ordinal(year, month, day, hour, minute,
-                                            second, base, mult)
+                                              second, base)
 
         elif isinstance(value, Period):
             other = value
@@ -163,12 +165,15 @@ def __init__(self, value=None, freq=None, ordinal=None,
             raise ValueError(msg)
 
         base, mult = _gfc(freq)
+        if mult != 1:
+            raise ValueError('Only mult == 1 supported')
 
         if self.ordinal is None:
-            self.ordinal = lib.period_ordinal(dt.year, dt.month, dt.day, dt.hour,
-                                            dt.minute, dt.second, base, mult)
+            self.ordinal = lib.period_ordinal(dt.year, dt.month, dt.day,
+                                              dt.hour, dt.minute, dt.second,
+                                              base)
 
-        self.freq = _freq_mod._get_freq_str(base, mult)
+        self.freq = _freq_mod._get_freq_str(base)
 
     def __eq__(self, other):
         if isinstance(other, Period):
@@ -210,14 +215,16 @@ def asfreq(self, freq=None, how='E'):
         base1, mult1 = _gfc(self.freq)
         base2, mult2 = _gfc(freq)
 
+        if mult2 != 1:
+            raise ValueError('Only mult == 1 supported')
+
         if how not in ('S', 'E'):
             raise ValueError('relation argument must be one of S or E')
 
         end = how == 'E'
-        new_ordinal = lib.period_asfreq(self.ordinal, base1, mult1,
-                                        base2, mult2, end)
+        new_ordinal = lib.period_asfreq(self.ordinal, base1, base2, end)
 
-        return Period(ordinal=new_ordinal, freq=(base2, mult2))
+        return Period(ordinal=new_ordinal, freq=base2)
 
     @property
     def start_time(self):
@@ -250,7 +257,11 @@ def to_timestamp(self, freq=None, how='S'):
         else:
             base, mult = _gfc(freq)
             new_val = self.asfreq(freq, how)
-        dt64 = lib.period_ordinal_to_dt64(new_val.ordinal, base, mult)
+
+        if mult != 1:
+            raise ValueError('Only mult == 1 supported')
+
+        dt64 = lib.period_ordinal_to_dt64(new_val.ordinal, base)
         ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how)
         return Timestamp(dt64, offset=to_offset(ts_freq))
 
@@ -274,15 +285,13 @@ def now(cls, freq=None):
 
     def __repr__(self):
         base, mult = _gfc(self.freq)
-        formatted = lib.period_ordinal_to_string(self.ordinal, base, mult)
+        formatted = lib.period_ordinal_to_string(self.ordinal, base)
         freqstr = _freq_mod._reverse_period_code_map[base]
-        if mult == 1:
-            return "Period('%s', '%s')" % (formatted, freqstr)
-        return ("Period('%s', '%d%s')" % (formatted, mult, freqstr))
+        return "Period('%s', '%s')" % (formatted, freqstr)
 
     def __str__(self):
         base, mult = _gfc(self.freq)
-        formatted = lib.period_ordinal_to_string(self.ordinal, base, mult)
+        formatted = lib.period_ordinal_to_string(self.ordinal, base)
         return ("%s" % formatted)
 
     def strftime(self, fmt):
@@ -424,9 +433,9 @@ def strftime(self, fmt):
         """
         base, mult = _gfc(self.freq)
         if fmt is not None:
-            return lib.period_strftime(self.ordinal, base, mult, fmt)
+            return lib.period_strftime(self.ordinal, base, fmt)
         else:
-            return lib.period_ordinal_to_string(self.ordinal, base, mult)
+            return lib.period_ordinal_to_string(self.ordinal, base)
 
 def _period_unbox(key, check=None):
     '''
@@ -465,7 +474,7 @@ def dt64arr_to_periodarr(data, freq):
     else:
         base, mult = freq
 
-    return lib.dt64arr_to_periodarr(data.view('i8'), base, mult)
+    return lib.dt64arr_to_periodarr(data.view('i8'), base)
 
 # --- Period index sketch
 
@@ -589,8 +598,7 @@ def __new__(cls, data=None,
                 else:
                     base1, mult1 = _gfc(data.freq)
                     base2, mult2 = _gfc(freq)
-                    data = lib.period_asfreq_arr(data.values, base1, mult1,
-                                                 base2, mult2, 1)
+                    data = lib.period_asfreq_arr(data.values, base1, base2, 1)
             else:
                 if freq is None and len(data) > 0:
                     freq = getattr(data[0], 'freq')
@@ -664,12 +672,14 @@ def asfreq(self, freq=None, how='E'):
         else:
             base2, mult2 = freq
 
+        if mult2 != 1:
+            raise ValueError('Only mult == 1 supported')
+
         if how not in ('S', 'E'):
             raise ValueError('relation argument must be one of S or E')
 
         end = how == 'E'
-        new_data = lib.period_asfreq_arr(self.values, base1, mult1,
-                                         base2, mult2, end)
+        new_data = lib.period_asfreq_arr(self.values, base1, base2, end)
 
         result = new_data.view(PeriodIndex)
         result.name = self.name
@@ -719,12 +729,14 @@ def to_timestamp(self, freq=None, how='start'):
         if freq is None:
             base, mult = _gfc(self.freq)
             new_data = self
-            # freq = self.freq
         else:
             base, mult = _gfc(freq)
             new_data = self.asfreq(freq, how)
-            # freq = 'infer'
-        new_data = lib.periodarr_to_dt64arr(new_data.values, base, mult)
+
+        if mult != 1:
+            raise ValueError('Only mult == 1 supported')
+
+        new_data = lib.periodarr_to_dt64arr(new_data.values, base)
         return DatetimeIndex(new_data, freq='infer')
 
     def shift(self, n):
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 92441661a8cf1..7b2230d57ed5e 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -153,9 +153,6 @@ def test_freq_str(self):
         i1 = Period('1982', freq='Min')
         self.assert_(i1.freq[0] != '1')
 
-        i2 = Period('11/30/2005', freq='2Q')
-        self.assertEquals(i2.freq[0], '2')
-
     def test_to_timestamp(self):
         p = Period('1982', freq='A')
         start_ts = p.to_timestamp(how='S')
@@ -1304,6 +1301,14 @@ def test_to_period_quarterly(self):
             result = stamps.to_period(freq)
             self.assert_(rng.equals(result))
 
+    def test_no_multiples(self):
+        self.assertRaises(ValueError, period_range, '1989Q3', periods=10,
+                          freq='2Q')
+
+        self.assertRaises(ValueError, period_range, '1989', periods=10,
+                          freq='2A')
+        self.assertRaises(ValueError, Period, '1989', freq='2A')
+
     # def test_iindex_multiples(self):
     #     ii = PeriodIndex(start='1/1/10', end='12/31/12', freq='2M')
     #     self.assertEquals(ii[0], Period('1/1/10', '2M'))

From 075f05e3520c08b9f78bbab48c84a9513a26dae7 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 12:22:58 -0400
Subject: [PATCH 009/114] ENH: move _ensure_{dtype} functions to Cython for
 speedup, close #1221

---
 pandas/core/common.py       | 34 ++++---------------------
 pandas/src/generate_code.py | 31 ++++++++++++++++++++++
 pandas/src/generated.pyx    | 51 +++++++++++++++++++++++++++++++++++++
 pandas/src/tseries.pyx      |  1 +
 pandas/src/util.pxd         |  1 -
 5 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index bc9873b6c8f43..8449359edf520 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -711,36 +711,12 @@ def is_float_dtype(arr_or_dtype):
     return issubclass(tipo, np.floating)
 
 
-def _ensure_float64(arr):
-    if arr.dtype != np.float64:
-        arr = arr.astype(np.float64)
-    return arr
-
-def _ensure_int64(arr):
-    try:
-        if arr.dtype != np.int64:
-            arr = arr.astype(np.int64)
-        return arr
-    except AttributeError:
-        return np.array(arr, dtype=np.int64)
+_ensure_float64 = _algos.ensure_float64
+_ensure_int64 = _algos.ensure_int64
+_ensure_int32 = _algos.ensure_int32
+_ensure_platform_int = _algos.ensure_platform_int
+_ensure_object = _algos.ensure_object
 
-def _ensure_platform_int(labels):
-    try:
-        if labels.dtype != np.int_:  # pragma: no cover
-            labels = labels.astype(np.int_)
-        return labels
-    except AttributeError:
-        return np.array(labels, dtype=np.int_)
-
-def _ensure_int32(arr):
-    if arr.dtype != np.int32:
-        arr = arr.astype(np.int32)
-    return arr
-
-def _ensure_object(arr):
-    if arr.dtype != np.object_:
-        arr = arr.astype('O')
-    return arr
 
 def _astype_nansafe(arr, dtype):
     if (np.issubdtype(arr.dtype, np.floating) and
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 7650cdb1109da..5c3c3784f2277 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -810,6 +810,35 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left,
 
 """
 
+# ensure_dtype functions
+
+ensure_dtype_template = """
+cpdef ensure_%(name)s(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_%(ctype)s:
+            return arr
+        else:
+            return arr.astype(np.%(dtype)s)
+    else:
+        return np.array(arr, dtype=np.%(dtype)s)
+
+"""
+
+ensure_functions = [
+    ('float64', 'FLOAT64', 'float64'),
+    ('int32', 'INT32', 'int32'),
+    ('int64', 'INT64', 'int64'),
+    ('platform_int', 'INT', 'int_'),
+    ('object', 'OBJECT', 'object_'),
+]
+
+def generate_ensure_dtypes():
+    output = StringIO()
+    for name, ctype, dtype in ensure_functions:
+        filled = ensure_dtype_template % locals()
+        output.write(filled)
+    return output.getvalue()
+
 #----------------------------------------------------------------------
 # Fast "put" logic for speeding up interleaving logic
 
@@ -916,6 +945,8 @@ def generate_take_cython_file(path='generated.pyx'):
         for template in nobool_1d_templates:
             print >> f, generate_from_template(template, exclude=['bool'])
 
+        print >> f, generate_ensure_dtypes()
+
         # print >> f, generate_put_functions()
 
 if __name__ == '__main__':
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index 44442210b7575..96f989d8cd506 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -3306,3 +3306,54 @@ def inner_join_indexer_int64(ndarray[int64_t] left,
     return result, lindexer, rindexer
 
 
+
+cpdef ensure_float64(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_FLOAT64:
+            return arr
+        else:
+            return arr.astype(np.float64)
+    else:
+        return np.array(arr, dtype=np.float64)
+
+
+cpdef ensure_int32(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_INT32:
+            return arr
+        else:
+            return arr.astype(np.int32)
+    else:
+        return np.array(arr, dtype=np.int32)
+
+
+cpdef ensure_int64(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_INT64:
+            return arr
+        else:
+            return arr.astype(np.int64)
+    else:
+        return np.array(arr, dtype=np.int64)
+
+
+cpdef ensure_platform_int(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_INT:
+            return arr
+        else:
+            return arr.astype(np.int_)
+    else:
+        return np.array(arr, dtype=np.int_)
+
+
+cpdef ensure_object(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == NPY_OBJECT:
+            return arr
+        else:
+            return arr.astype(np.object_)
+    else:
+        return np.array(arr, dtype=np.object_)
+
+
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 65bc784fdbf0e..b8685a051eba3 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -665,6 +665,7 @@ def value_count_int64(ndarray[int64_t] values):
 
     return result_keys, result_counts
 
+
 include "hashtable.pyx"
 include "datetime.pyx"
 include "skiplist.pyx"
diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd
index c1c76b726a6d7..22d7c7896902c 100644
--- a/pandas/src/util.pxd
+++ b/pandas/src/util.pxd
@@ -60,4 +60,3 @@ cdef inline bint _checknull(object val):
 
 cdef inline bint _checknan(object val):
     return not cnp.PyArray_Check(val) and val != val
-

From ee73df1123b7d9a0ebb30c2fe667aca64c857cbc Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 12:31:20 -0400
Subject: [PATCH 010/114] DOC: doc fixes

---
 doc/source/indexing.rst   |  1 +
 doc/source/timeseries.rst | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 4f8c7166e5024..2a2614eddbba7 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -611,6 +611,7 @@ As a convenience, you can pass a list of arrays directly into Series or
 DataFrame to construct a MultiIndex automatically:
 
 .. ipython:: python
+
    arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
              np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]
    s = Series(randn(8), index=arrays)
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index e409a1a64961a..c355c2fb3f1fb 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -250,7 +250,7 @@ alias parsing is case sensitive.
 .. _timeseries.daterange:
 
 Generating date ranges (date_range)
-----------------------------------
+-----------------------------------
 
 The ``date_range`` class utilizes these offsets (and any ones that we might add)
 to generate fixed-frequency date ranges:
@@ -260,9 +260,9 @@ to generate fixed-frequency date ranges:
    start = datetime(2009, 1, 1)
    end = datetime(2010, 1, 1)
 
-   rng = date_range(start, end, offset=BDay())
+   rng = date_range(start, end, freq=BDay())
    rng
-   date_range(start, end, offset=BMonthEnd())
+   date_range(start, end, freq=BMonthEnd())
 
 **Business day frequency** is the default for ``date_range``. You can also
 strictly generate a ``date_range`` of a certain length by providing either a
@@ -277,7 +277,7 @@ The start and end dates are strictly inclusive. So it will not generate any
 dates outside of those dates if specified.
 
 date_range is a valid Index
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 One of the main uses for ``date_range`` is as an index for pandas objects. When
 working with a lot of time series data, there are several reasons to use
@@ -295,7 +295,7 @@ slicing, etc.
 
 .. ipython:: python
 
-   rng = date_range(start, end, offset=BMonthEnd())
+   rng = date_range(start, end, freq=BMonthEnd())
    ts = Series(randn(len(rng)), index=rng)
    ts.index
    ts[:5].index
@@ -339,8 +339,8 @@ rule <timeseries.timerule>`:
 
 .. ipython:: python
 
-   ts.shift(5, offset=datetools.bday)
-   ts.shift(5, offset='EOM')
+   ts.shift(5, freq=datetools.bday)
+   ts.shift(5, freq='EOM')
 
 Frequency conversion
 ~~~~~~~~~~~~~~~~~~~~
@@ -351,7 +351,7 @@ generates a ``date_range`` and calls ``reindex``.
 
 .. ipython:: python
 
-   dr = date_range('1/1/2010', periods=3, offset=3 * datetools.bday)
+   dr = date_range('1/1/2010', periods=3, freq=3 * datetools.bday)
    ts = Series(randn(3), index=dr)
    ts
    ts.asfreq(BDay())
@@ -377,9 +377,9 @@ view) application of GroupBy. Carry out the following steps:
 
 .. code-block:: python
 
-   dr1hour = date_range(start, end, offset=Hour())
-   dr5day = date_range(start, end, offset=5 * datetools.day)
-   dr10day = date_range(start, end, offset=10 * datetools.day)
+   dr1hour = date_range(start, end, freq=Hour())
+   dr5day = date_range(start, end, freq=5 * datetools.day)
+   dr10day = date_range(start, end, freq=10 * datetools.day)
 
 
 2. Use the ``asof`` function ("as of") of the date_range to do a groupby
@@ -396,11 +396,11 @@ Here is a fully-worked example:
 
    # some minutely data
    minutely = date_range('1/3/2000 00:00:00', '1/3/2000 12:00:00',
-                        offset=datetools.Minute())
+                        freq=datetools.Minute())
    ts = Series(randn(len(minutely)), index=minutely)
    ts.index
 
-   hourly = date_range('1/3/2000', '1/4/2000', offset=datetools.Hour())
+   hourly = date_range('1/3/2000', '1/4/2000', freq=datetools.Hour())
 
    grouped = ts.groupby(hourly.asof)
    grouped.mean()

From 9e88e0cbdc579d83cea5fbf033844812d6659bce Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 12:53:07 -0400
Subject: [PATCH 011/114] ENH: handle dict return values and vbench, close #823

---
 pandas/core/groupby.py       |  6 +++++-
 pandas/tests/test_groupby.py | 13 +++++++++++++
 vb_suite/groupby.py          | 11 +++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 4b4d7a8581f65..0c1e580c5bbc4 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1213,7 +1213,11 @@ def _get_index():
                 index = Index(keys, name=self.grouper.names[0])
             return index
 
-        if isinstance(values[0], Series):
+        if isinstance(values[0], dict):
+            # # GH #823
+            return DataFrame(values, index=keys).stack()
+
+        if isinstance(values[0], (Series, dict)):
             return self._concat_objects(keys, values,
                                         not_indexed_same=not_indexed_same)
         elif isinstance(values[0], DataFrame):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index fda572ebccf81..f7aba1ecfd523 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1701,6 +1701,19 @@ def test_multifunc_sum_bug(self):
         result = grouped.agg({'fl':'sum',2:'size'})
         self.assert_(result['fl'].dtype == np.float64)
 
+    def test_handle_dict_return_value(self):
+        def f(group):
+            return {'min': group.min(), 'max': group.max()}
+
+        def g(group):
+            return Series({'min': group.min(), 'max': group.max()})
+
+        result = self.df.groupby('A')['C'].apply(f)
+        expected = self.df.groupby('A')['C'].apply(g)
+
+        self.assert_(isinstance(result, Series))
+        assert_series_equal(result, expected)
+
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = map(tuple, df[keys].values)
     tups = com._asarray_tuplesafe(tups)
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
index f8e5790796bbb..f5d8ababfc17f 100644
--- a/vb_suite/groupby.py
+++ b/vb_suite/groupby.py
@@ -144,3 +144,14 @@ def f():
 groupby_pivot_table = Benchmark(stmt, setup, start_date=datetime(2011, 12, 15))
 
 
+#----------------------------------------------------------------------
+# dict return values
+
+setup = common_setup + """
+labels = np.arange(1000).repeat(10)
+data = Series(randn(len(labels)))
+f = lambda x: {'first': x.values[0], 'last': x.values[-1]}
+"""
+
+groupby_apply_dict_return = Benchmark('data.groupby(labels).apply(f)',
+                                      setup, start_date=datetime(2011, 12, 15))

From a31ed384107acf9027b25797c342cd97fc56359b Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 13:01:51 -0400
Subject: [PATCH 012/114] ENH: add is_full method to PeriodIndex close #1114

---
 pandas/tseries/period.py            | 13 +++++++++++++
 pandas/tseries/tests/test_period.py | 17 +++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index 3d7f730af47ed..a662c35396448 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -656,6 +656,19 @@ def __iter__(self):
     def is_all_dates(self):
         return True
 
+    @property
+    def is_full(self):
+        """
+        Returns True if there are any missing periods from start to end
+        """
+        if len(self) == 0:
+            return True
+        if not self.is_monotonic:
+            raise ValueError('Index is not monotonic')
+        values = self.values
+        return ((values[1:] - values[:-1]) < 2).all()
+
+
     @property
     def freqstr(self):
         return self.freq
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 7b2230d57ed5e..1842a6f9bbbf0 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -1464,6 +1464,23 @@ def _check_field(self, periodindex, fieldname):
         for x, val in zip(periodindex, field_idx):
             assert_equal(getattr(x, fieldname), val)
 
+    def test_is_full(self):
+        index = PeriodIndex([2005, 2007, 2009], freq='A')
+        self.assert_(not index.is_full)
+
+        index = PeriodIndex([2005, 2006, 2007], freq='A')
+        self.assert_(index.is_full)
+
+        index = PeriodIndex([2005, 2005, 2007], freq='A')
+        self.assert_(not index.is_full)
+
+        index = PeriodIndex([2005, 2005, 2006], freq='A')
+        self.assert_(index.is_full)
+
+        index = PeriodIndex([2006, 2005, 2005], freq='A')
+        self.assertRaises(ValueError, getattr, index, 'is_full')
+
+        self.assert_(index[:0].is_full)
 
 def _permute(obj):
     return obj.take(np.random.permutation(len(obj)))

From b457ff8c246c382baa74b8f0e916111342305681 Mon Sep 17 00:00:00 2001
From: Mark Wiebe <mwwiebe@gmail.com>
Date: Mon, 7 May 2012 14:12:47 -0500
Subject: [PATCH 013/114] Remove dependencies on details of experimental numpy
 datetime64 ABI

Pandas was using some of the enums and structures exposed by its headers.
By creating its own local copies of these, it is possible to allow the
numpy ABI to be improved while in its experimental state.
---
 pandas/src/datetime.pxd          |  68 +++++-----
 pandas/src/datetime.pyx          |  89 ++++++-------
 pandas/src/np_datetime.c         | 130 +++++++++++--------
 pandas/src/np_datetime.h         |  65 +++++++---
 pandas/src/np_datetime_strings.c | 210 +++++++++++++++----------------
 pandas/src/np_datetime_strings.h |  14 +--
 6 files changed, 312 insertions(+), 264 deletions(-)

diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd
index c16eaa7309870..ed56029b6ef0f 100644
--- a/pandas/src/datetime.pxd
+++ b/pandas/src/datetime.pxd
@@ -42,26 +42,6 @@ cdef extern from "numpy/ndarrayobject.h":
     ctypedef int64_t npy_timedelta
     ctypedef int64_t npy_datetime
 
-    ctypedef struct npy_datetimestruct:
-        int64_t year
-        int month, day, hour, min, sec, us, ps, as
-
-    ctypedef enum NPY_DATETIMEUNIT:
-        #NPY_FR_Y
-        #NPY_FR_M
-        #NPY_FR_W
-        #NPY_FR_B
-        #NPY_FR_D
-        #NPY_FR_h
-        #NPY_FR_m
-        #NPY_FR_s
-        #NPY_FR_ms
-        NPY_FR_us
-        #NPY_FR_ns
-        #NPY_FR_ps
-        #NPY_FR_fs
-        #NPY_FR_as
-
     ctypedef enum NPY_CASTING:
             NPY_NO_CASTING
             NPY_EQUIV_CASTING
@@ -69,13 +49,6 @@ cdef extern from "numpy/ndarrayobject.h":
             NPY_SAME_KIND_CASTING
             NPY_UNSAFE_CASTING
 
-    npy_datetime PyArray_DatetimeStructToDatetime(NPY_DATETIMEUNIT fr,
-                                                  npy_datetimestruct *d)
-
-    void PyArray_DatetimeToDatetimeStruct(npy_datetime val,
-                                          NPY_DATETIMEUNIT fr,
-                                          npy_datetimestruct *result)
-
 cdef extern from "numpy_helper.h":
     npy_datetime unbox_datetime64_scalar(object o)
 
@@ -85,9 +58,32 @@ cdef extern from "numpy/npy_common.h":
 
 cdef extern from "np_datetime.h":
 
-    int convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
-                                             NPY_DATETIMEUNIT *out_bestunit,
+    ctypedef enum PANDAS_DATETIMEUNIT:
+        PANDAS_FR_Y
+        PANDAS_FR_M
+        PANDAS_FR_W
+        PANDAS_FR_D
+        PANDAS_FR_B
+        PANDAS_FR_h
+        PANDAS_FR_m
+        PANDAS_FR_s
+        PANDAS_FR_ms
+        PANDAS_FR_us
+        PANDAS_FR_ns
+        PANDAS_FR_ps
+        PANDAS_FR_fs
+        PANDAS_FR_as
+
+    ctypedef struct pandas_datetimestruct:
+        int64_t year
+        int month, day, hour, min, sec, us, ps, as
+
+    int convert_pydatetime_to_datetimestruct(PyObject *obj, pandas_datetimestruct *out,
+                                             PANDAS_DATETIMEUNIT *out_bestunit,
                                              int apply_tzinfo)
+
+    npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d)
+    void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result)
     int _days_per_month_table[2][12]
 
     int dayofweek(int y, int m, int d)
@@ -95,18 +91,18 @@ cdef extern from "np_datetime.h":
 
 cdef extern from "np_datetime_strings.h":
 
-    int parse_iso_8601_datetime(char *str, int len, NPY_DATETIMEUNIT unit,
-                                NPY_CASTING casting, npy_datetimestruct *out,
-                                npy_bool *out_local, NPY_DATETIMEUNIT *out_bestunit,
+    int parse_iso_8601_datetime(char *str, int len, PANDAS_DATETIMEUNIT unit,
+                                NPY_CASTING casting, pandas_datetimestruct *out,
+                                npy_bool *out_local, PANDAS_DATETIMEUNIT *out_bestunit,
                                 npy_bool *out_special)
 
-    int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                               int local, NPY_DATETIMEUNIT base, int tzoffset,
+    int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
+                               int local, PANDAS_DATETIMEUNIT base, int tzoffset,
                                NPY_CASTING casting)
 
-    int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
+    int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base)
 
-    # int parse_python_string(object obj, npy_datetimestruct *out) except -1
+    # int parse_python_string(object obj, pandas_datetimestruct *out) except -1
 
 cdef extern from "period.h":
     ctypedef struct date_info:
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index c481d7a020050..5988179eb4371 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -12,9 +12,12 @@ from util cimport is_integer_object, is_datetime64_object
 from dateutil.parser import parse as parse_date
 cimport util
 
+from khash cimport *
+import cython
+
 # initialize numpy
 import_array()
-import_ufunc()
+#import_ufunc()
 
 # import datetime C API
 PyDateTime_IMPORT
@@ -220,7 +223,7 @@ cdef class _Timestamp(datetime):
 # lightweight C object to hold datetime & int64 pair
 cdef class _TSObject:
     cdef:
-        npy_datetimestruct dts      # npy_datetimestruct
+        pandas_datetimestruct dts      # pandas_datetimestruct
         int64_t value               # numpy dt64
         object tzinfo
 
@@ -247,13 +250,13 @@ cpdef convert_to_tsobject(object ts, object tz=None):
 
     if is_datetime64_object(ts):
         obj.value = unbox_datetime64_scalar(ts)
-        PyArray_DatetimeToDatetimeStruct(obj.value, NPY_FR_us, &obj.dts)
+        pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us, &obj.dts)
     elif is_integer_object(ts):
         obj.value = ts
-        PyArray_DatetimeToDatetimeStruct(ts, NPY_FR_us, &obj.dts)
+        pandas_datetime_to_datetimestruct(ts, PANDAS_FR_us, &obj.dts)
     elif util.is_string_object(ts):
         _string_to_dts(ts, &obj.dts)
-        obj.value = PyArray_DatetimeStructToDatetime(NPY_FR_us, &obj.dts)
+        obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &obj.dts)
     elif PyDateTime_Check(ts):
         obj.value = _pydatetime_to_dts(ts, &obj.dts)
         obj.tzinfo = ts.tzinfo
@@ -277,7 +280,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
             obj.value = obj.value + deltas[pos]
 
             if utc_convert:
-                PyArray_DatetimeToDatetimeStruct(obj.value, NPY_FR_us,
+                pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us,
                                                  &obj.dts)
                 obj.tzinfo = tz._tzinfos[inf]
 
@@ -293,16 +296,16 @@ cpdef convert_to_tsobject(object ts, object tz=None):
 #     obj.dtval = _dts_to_pydatetime(&obj.dts)
 
 cdef inline object _datetime64_to_datetime(int64_t val):
-    cdef npy_datetimestruct dts
-    PyArray_DatetimeToDatetimeStruct(val, NPY_FR_us, &dts)
+    cdef pandas_datetimestruct dts
+    pandas_datetime_to_datetimestruct(val, PANDAS_FR_us, &dts)
     return _dts_to_pydatetime(&dts)
 
-cdef inline object _dts_to_pydatetime(npy_datetimestruct *dts):
+cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts):
     return <object> PyDateTime_FromDateAndTime(dts.year, dts.month,
                                                dts.day, dts.hour,
                                                dts.min, dts.sec, dts.us)
 
-cdef inline int64_t _pydatetime_to_dts(object val, npy_datetimestruct *dts):
+cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
     dts.year = PyDateTime_GET_YEAR(val)
     dts.month = PyDateTime_GET_MONTH(val)
     dts.day = PyDateTime_GET_DAY(val)
@@ -310,10 +313,10 @@ cdef inline int64_t _pydatetime_to_dts(object val, npy_datetimestruct *dts):
     dts.min = PyDateTime_DATE_GET_MINUTE(val)
     dts.sec = PyDateTime_DATE_GET_SECOND(val)
     dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
-    return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
 
 cdef inline int64_t _dtlike_to_datetime64(object val,
-                                          npy_datetimestruct *dts):
+                                          pandas_datetimestruct *dts):
     dts.year = val.year
     dts.month = val.month
     dts.day = val.day
@@ -321,10 +324,10 @@ cdef inline int64_t _dtlike_to_datetime64(object val,
     dts.min = val.minute
     dts.sec = val.second
     dts.us = val.microsecond
-    return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
 
 cdef inline int64_t _date_to_datetime64(object val,
-                                        npy_datetimestruct *dts):
+                                        pandas_datetimestruct *dts):
     dts.year = PyDateTime_GET_YEAR(val)
     dts.month = PyDateTime_GET_MONTH(val)
     dts.day = PyDateTime_GET_DAY(val)
@@ -332,17 +335,17 @@ cdef inline int64_t _date_to_datetime64(object val,
     dts.min = 0
     dts.sec = 0
     dts.us = 0
-    return PyArray_DatetimeStructToDatetime(NPY_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
 
 
-cdef inline int _string_to_dts(object val, npy_datetimestruct* dts) except -1:
+cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts) except -1:
     cdef:
         npy_bool islocal, special
-        NPY_DATETIMEUNIT out_bestunit
+        PANDAS_DATETIMEUNIT out_bestunit
 
     if PyUnicode_Check(val):
         val = PyUnicode_AsASCIIString(val);
-    parse_iso_8601_datetime(val, len(val), NPY_FR_us, NPY_UNSAFE_CASTING,
+    parse_iso_8601_datetime(val, len(val), PANDAS_FR_us, NPY_UNSAFE_CASTING,
                             dts, &islocal, &out_bestunit, &special)
     return 0
 
@@ -741,12 +744,12 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
         for i in range(n):
             val = strings[i]
             if util._checknull(val):
-                result[i] = NaT
+                result[i] = 'NaT'
             elif PyDateTime_Check(val):
                 result[i] = val
             else:
                 if len(val) == 0:
-                    result[i] = NaT
+                    result[i] = 'NaT'
                     continue
                 try:
                     result[i] = parse(val, dayfirst=dayfirst)
@@ -762,7 +765,7 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
                 oresult[i] = val
             else:
                 if len(val) == 0:
-                    oresult[i] = NaT
+                    oresult[i] = 'NaT'
                     continue
                 try:
                     oresult[i] = parse(val, dayfirst=dayfirst)
@@ -983,7 +986,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):
     cdef:
         Py_ssize_t i, count = 0
         int isleap
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
         ndarray[int32_t] years, months, days, hours, minutes, seconds, mus
 
     count = len(dtindex)
@@ -1007,7 +1010,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):
     mus = out['u']
 
     for i in range(count):
-        PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
         years[i] = dts.year
         months[i] = dts.month
         days[i] = dts.day
@@ -1030,7 +1033,7 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
         ndarray[int32_t] out
         ndarray[int32_t, ndim=2] _month_offset
         int isleap
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
 
     _month_offset = np.array(
         [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ],
@@ -1042,49 +1045,49 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     if field == 'Y':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.year
         return out
 
     elif field == 'M':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.month
         return out
 
     elif field == 'D':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.day
         return out
 
     elif field == 'h':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.hour
         return out
 
     elif field == 'm':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.min
         return out
 
     elif field == 's':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.sec
         return out
 
     elif field == 'us':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.us
         return out
 
     elif field == 'doy':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             isleap = is_leapyear(dts.year)
             out[i] = _month_offset[isleap, dts.month-1] + dts.day
         return out
@@ -1097,7 +1100,7 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     elif field == 'woy':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             isleap = is_leapyear(dts.year)
             out[i] = _month_offset[isleap, dts.month - 1] + dts.day
             out[i] = ((out[i] - 1) / 7) + 1
@@ -1105,7 +1108,7 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     elif field == 'q':
         for i in range(count):
-            PyArray_DatetimeToDatetimeStruct(dtindex[i], NPY_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
             out[i] = dts.month
             out[i] = ((out[i] - 1) / 3) + 1
         return out
@@ -1165,25 +1168,25 @@ def date_normalize(ndarray[int64_t] stamps):
     cdef:
         Py_ssize_t i, n = len(stamps)
         ndarray[int64_t] result = np.empty(n, dtype=np.int64)
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
 
     for i in range(n):
-        PyArray_DatetimeToDatetimeStruct(stamps[i], NPY_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
         dts.hour = 0
         dts.min = 0
         dts.sec = 0
         dts.us = 0
-        result[i] = PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
+        result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
 
     return result
 
 def dates_normalized(ndarray[int64_t] stamps):
     cdef:
         Py_ssize_t i, n = len(stamps)
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
 
     for i in range(n):
-        PyArray_DatetimeToDatetimeStruct(stamps[i], NPY_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
         if (dts.hour + dts.min + dts.sec + dts.us) > 0:
             return False
 
@@ -1241,14 +1244,14 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
     cdef:
         ndarray[int64_t] out
         Py_ssize_t i, l
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
 
     l = len(dtarr)
 
     out = np.empty(l, dtype='i8')
 
     for i in range(l):
-        PyArray_DatetimeToDatetimeStruct(dtarr[i], NPY_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_us, &dts)
         out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
                                   dts.hour, dts.min, dts.sec, freq)
     return out
@@ -1334,7 +1337,7 @@ def period_ordinal(int y, int m, int d, int h, int min, int s, int freq):
 
 cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
     cdef:
-        npy_datetimestruct dts
+        pandas_datetimestruct dts
         date_info dinfo
 
     get_date_info(ordinal, freq, &dinfo)
@@ -1347,7 +1350,7 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
     dts.sec = int(dinfo.second)
     dts.us = 0
 
-    return PyArray_DatetimeStructToDatetime(NPY_FR_us, &dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
 
 def period_ordinal_to_string(int64_t value, int freq):
     cdef:
diff --git a/pandas/src/np_datetime.c b/pandas/src/np_datetime.c
index 521f964cf86db..6b238b87f0a9b 100644
--- a/pandas/src/np_datetime.c
+++ b/pandas/src/np_datetime.c
@@ -63,7 +63,7 @@ int dayofweek(int y, int m, int d)
  * the current values are valid.
  */
 void
-add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
+add_minutes_to_datetimestruct(pandas_datetimestruct *dts, int minutes)
 {
     int isleap;
 
@@ -115,7 +115,7 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
  * Calculates the days offset from the 1970 epoch.
  */
 npy_int64
-get_datetimestruct_days(const npy_datetimestruct *dts)
+get_datetimestruct_days(const pandas_datetimestruct *dts)
 {
     int i, month;
     npy_int64 year, days = 0;
@@ -221,7 +221,7 @@ days_to_yearsdays(npy_int64 *days_)
  * the current values are valid.
  */
 NPY_NO_EXPORT void
-add_seconds_to_datetimestruct(npy_datetimestruct *dts, int seconds)
+add_seconds_to_datetimestruct(pandas_datetimestruct *dts, int seconds)
 {
     int minutes;
 
@@ -247,7 +247,7 @@ add_seconds_to_datetimestruct(npy_datetimestruct *dts, int seconds)
  * offset from 1970.
  */
 static void
-set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts)
+set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts)
 {
     int *month_lengths, i;
 
@@ -269,7 +269,7 @@ set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts)
 /*
  *
  * Tests for and converts a Python datetime.datetime or datetime.date
- * object into a NumPy npy_datetimestruct.
+ * object into a NumPy pandas_datetimestruct.
  *
  * While the C API has PyDate_* and PyDateTime_* functions, the following
  * implementation just asks for attributes, and thus supports
@@ -286,15 +286,15 @@ set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts)
  * if obj doesn't have the neeeded date or datetime attributes.
  */
 int
-convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
-                                     NPY_DATETIMEUNIT *out_bestunit,
+convert_pydatetime_to_datetimestruct(PyObject *obj, pandas_datetimestruct *out,
+                                     PANDAS_DATETIMEUNIT *out_bestunit,
                                      int apply_tzinfo)
 {
     PyObject *tmp;
     int isleap;
 
     /* Initialize the output to all zeros */
-    memset(out, 0, sizeof(npy_datetimestruct));
+    memset(out, 0, sizeof(pandas_datetimestruct));
     out->month = 1;
     out->day = 1;
 
@@ -358,7 +358,7 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
             !PyObject_HasAttrString(obj, "microsecond")) {
         /* The best unit for date is 'D' */
         if (out_bestunit != NULL) {
-            *out_bestunit = NPY_FR_D;
+            *out_bestunit = PANDAS_FR_D;
         }
         return 0;
     }
@@ -463,7 +463,7 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
 
     /* The resolution of Python's datetime is 'us' */
     if (out_bestunit != NULL) {
-        *out_bestunit = NPY_FR_us;
+        *out_bestunit = PANDAS_FR_us;
     }
 
     return 0;
@@ -482,6 +482,28 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
     return -1;
 }
 
+npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d)
+{
+    pandas_datetime_metadata meta;
+    npy_datetime result = PANDAS_DATETIME_NAT;
+
+    meta.base = fr;
+    meta.num = 1;
+
+    convert_datetimestruct_to_datetime(&meta, d, &result);
+    return result;
+}
+
+void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, npy_datetimestruct *result)
+{
+    pandas_datetime_metadata meta;
+
+    meta.base = fr;
+    meta.num = 1;
+
+    convert_datetime_to_datetimestruct(&meta, val, result);
+}
+
 /*
  * Converts a datetime from a datetimestruct to a datetime based
  * on some metadata. The date is assumed to be valid.
@@ -491,18 +513,18 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
  * Returns 0 on success, -1 on failure.
  */
 int
-convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
-                                    const npy_datetimestruct *dts,
+convert_datetimestruct_to_datetime(pandas_datetime_metadata *meta,
+                                    const pandas_datetimestruct *dts,
                                     npy_datetime *out)
 {
     npy_datetime ret;
-    NPY_DATETIMEUNIT base = meta->base;
+    PANDAS_DATETIMEUNIT base = meta->base;
 
-    if (base == NPY_FR_Y) {
+    if (base == PANDAS_FR_Y) {
         /* Truncate to the year */
         ret = dts->year - 1970;
     }
-    else if (base == NPY_FR_M) {
+    else if (base == PANDAS_FR_M) {
         /* Truncate to the month */
         ret = 12 * (dts->year - 1970) + (dts->month - 1);
     }
@@ -511,7 +533,7 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
         npy_int64 days = get_datetimestruct_days(dts);
 
         switch (base) {
-            case NPY_FR_W:
+            case PANDAS_FR_W:
                 /* Truncate to weeks */
                 if (days >= 0) {
                     ret = days / 7;
@@ -520,39 +542,39 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
                     ret = (days - 6) / 7;
                 }
                 break;
-            case NPY_FR_D:
+            case PANDAS_FR_D:
                 ret = days;
                 break;
-            case NPY_FR_h:
+            case PANDAS_FR_h:
                 ret = days * 24 +
                       dts->hour;
                 break;
-            case NPY_FR_m:
+            case PANDAS_FR_m:
                 ret = (days * 24 +
                       dts->hour) * 60 +
                       dts->min;
                 break;
-            case NPY_FR_s:
+            case PANDAS_FR_s:
                 ret = ((days * 24 +
                       dts->hour) * 60 +
                       dts->min) * 60 +
                       dts->sec;
                 break;
-            case NPY_FR_ms:
+            case PANDAS_FR_ms:
                 ret = (((days * 24 +
                       dts->hour) * 60 +
                       dts->min) * 60 +
                       dts->sec) * 1000 +
                       dts->us / 1000;
                 break;
-            case NPY_FR_us:
+            case PANDAS_FR_us:
                 ret = (((days * 24 +
                       dts->hour) * 60 +
                       dts->min) * 60 +
                       dts->sec) * 1000000 +
                       dts->us;
                 break;
-            case NPY_FR_ns:
+            case PANDAS_FR_ns:
                 ret = ((((days * 24 +
                       dts->hour) * 60 +
                       dts->min) * 60 +
@@ -560,7 +582,7 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
                       dts->us) * 1000 +
                       dts->ps / 1000;
                 break;
-            case NPY_FR_ps:
+            case PANDAS_FR_ps:
                 ret = ((((days * 24 +
                       dts->hour) * 60 +
                       dts->min) * 60 +
@@ -568,7 +590,7 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
                       dts->us) * 1000000 +
                       dts->ps;
                 break;
-            case NPY_FR_fs:
+            case PANDAS_FR_fs:
                 /* only 2.6 hours */
                 ret = (((((days * 24 +
                       dts->hour) * 60 +
@@ -578,7 +600,7 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
                       dts->ps) * 1000 +
                       dts->as / 1000;
                 break;
-            case NPY_FR_as:
+            case PANDAS_FR_as:
                 /* only 9.2 secs */
                 ret = (((((days * 24 +
                       dts->hour) * 60 +
@@ -619,8 +641,8 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
  * months units, and all the other units.
  */
 npy_bool
-can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
-                          NPY_DATETIMEUNIT dst_unit,
+can_cast_timedelta64_units(PANDAS_DATETIMEUNIT src_unit,
+                          PANDAS_DATETIMEUNIT dst_unit,
                           NPY_CASTING casting)
 {
     switch (casting) {
@@ -633,8 +655,8 @@ can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
          * 'same_kind' casting.
          */
         case NPY_SAME_KIND_CASTING:
-            return (src_unit <= NPY_FR_M && dst_unit <= NPY_FR_M) ||
-                    (src_unit > NPY_FR_M && dst_unit > NPY_FR_M);
+            return (src_unit <= PANDAS_FR_M && dst_unit <= PANDAS_FR_M) ||
+                    (src_unit > PANDAS_FR_M && dst_unit > PANDAS_FR_M);
 
         /*
          * Enforce the 'date units' vs 'time units' barrier and that
@@ -643,8 +665,8 @@ can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
          */
         case NPY_SAFE_CASTING:
             return (src_unit <= dst_unit) &&
-                    ((src_unit <= NPY_FR_M && dst_unit <= NPY_FR_M) ||
-                    (src_unit > NPY_FR_M && dst_unit > NPY_FR_M));
+                    ((src_unit <= PANDAS_FR_M && dst_unit <= PANDAS_FR_M) ||
+                    (src_unit > PANDAS_FR_M && dst_unit > PANDAS_FR_M));
 
         /* Enforce equality with 'no' or 'equiv' casting */
         default:
@@ -659,8 +681,8 @@ can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
  * for all but 'unsafe' casting.
  */
 npy_bool
-can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
-                          NPY_DATETIMEUNIT dst_unit,
+can_cast_datetime64_units(PANDAS_DATETIMEUNIT src_unit,
+                          PANDAS_DATETIMEUNIT dst_unit,
                           NPY_CASTING casting)
 {
     switch (casting) {
@@ -673,8 +695,8 @@ can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
          * 'same_kind' casting.
          */
         case NPY_SAME_KIND_CASTING:
-            return (src_unit <= NPY_FR_D && dst_unit <= NPY_FR_D) ||
-                    (src_unit > NPY_FR_D && dst_unit > NPY_FR_D);
+            return (src_unit <= PANDAS_FR_D && dst_unit <= PANDAS_FR_D) ||
+                    (src_unit > PANDAS_FR_D && dst_unit > PANDAS_FR_D);
 
         /*
          * Enforce the 'date units' vs 'time units' barrier and that
@@ -683,8 +705,8 @@ can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
          */
         case NPY_SAFE_CASTING:
             return (src_unit <= dst_unit) &&
-                    ((src_unit <= NPY_FR_D && dst_unit <= NPY_FR_D) ||
-                    (src_unit > NPY_FR_D && dst_unit > NPY_FR_D));
+                    ((src_unit <= PANDAS_FR_D && dst_unit <= PANDAS_FR_D) ||
+                    (src_unit > PANDAS_FR_D && dst_unit > PANDAS_FR_D));
 
         /* Enforce equality with 'no' or 'equiv' casting */
         default:
@@ -696,14 +718,14 @@ can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
  * Converts a datetime based on the given metadata into a datetimestruct
  */
 int
-convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
+convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
                                     npy_datetime dt,
-                                    npy_datetimestruct *out)
+                                    pandas_datetimestruct *out)
 {
     npy_int64 perday;
 
     /* Initialize the output to all zeros */
-    memset(out, 0, sizeof(npy_datetimestruct));
+    memset(out, 0, sizeof(pandas_datetimestruct));
     out->year = 1970;
     out->month = 1;
     out->day = 1;
@@ -716,11 +738,11 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
      * for negative values.
      */
     switch (meta->base) {
-        case NPY_FR_Y:
+        case PANDAS_FR_Y:
             out->year = 1970 + dt;
             break;
 
-        case NPY_FR_M:
+        case PANDAS_FR_M:
             if (dt >= 0) {
                 out->year  = 1970 + dt / 12;
                 out->month = dt % 12 + 1;
@@ -731,16 +753,16 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             }
             break;
 
-        case NPY_FR_W:
+        case PANDAS_FR_W:
             /* A week is 7 days */
             set_datetimestruct_days(dt * 7, out);
             break;
 
-        case NPY_FR_D:
+        case PANDAS_FR_D:
             set_datetimestruct_days(dt, out);
             break;
 
-        case NPY_FR_h:
+        case PANDAS_FR_h:
             perday = 24LL;
 
             if (dt >= 0) {
@@ -754,7 +776,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->hour = dt;
             break;
 
-        case NPY_FR_m:
+        case PANDAS_FR_m:
             perday = 24LL * 60;
 
             if (dt >= 0) {
@@ -769,7 +791,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->min = dt % 60;
             break;
 
-        case NPY_FR_s:
+        case PANDAS_FR_s:
             perday = 24LL * 60 * 60;
 
             if (dt >= 0) {
@@ -785,7 +807,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->sec = dt % 60;
             break;
 
-        case NPY_FR_ms:
+        case PANDAS_FR_ms:
             perday = 24LL * 60 * 60 * 1000;
 
             if (dt >= 0) {
@@ -802,7 +824,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->us = (dt % 1000LL) * 1000;
             break;
 
-        case NPY_FR_us:
+        case PANDAS_FR_us:
             perday = 24LL * 60LL * 60LL * 1000LL * 1000LL;
 
             if (dt >= 0) {
@@ -819,7 +841,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->us = dt % 1000000LL;
             break;
 
-        case NPY_FR_ns:
+        case PANDAS_FR_ns:
             perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL;
 
             if (dt >= 0) {
@@ -837,7 +859,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->ps = (dt % 1000LL) * 1000;
             break;
 
-        case NPY_FR_ps:
+        case PANDAS_FR_ps:
             perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
 
             if (dt >= 0) {
@@ -855,7 +877,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             out->ps = dt % 1000000LL;
             break;
 
-        case NPY_FR_fs:
+        case PANDAS_FR_fs:
             /* entire range is only +- 2.6 hours */
             if (dt >= 0) {
                 out->hour = dt / (60*60*1000000000000000LL);
@@ -883,7 +905,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             }
             break;
 
-        case NPY_FR_as:
+        case PANDAS_FR_as:
             /* entire range is only +- 9.2 seconds */
             if (dt >= 0) {
                 out->sec = (dt / 1000000000000000000LL) % 60;
diff --git a/pandas/src/np_datetime.h b/pandas/src/np_datetime.h
index 29598e9262b21..ca96201d3b1a6 100644
--- a/pandas/src/np_datetime.h
+++ b/pandas/src/np_datetime.h
@@ -6,15 +6,50 @@
 #ifndef _PANDAS_DATETIME_H_
 #define _PANDAS_DATETIME_H_
 
-#define NPY_DATETIME_MAX_ISO8601_STRLEN (21+3*5+1+3*6+6+1)
+typedef enum {
+        PANDAS_FR_Y, /* Years */
+        PANDAS_FR_M, /* Months */
+        PANDAS_FR_W, /* Weeks */
+        PANDAS_FR_D, /* Days */
+        PANDAS_FR_B, /* Business days */
+        PANDAS_FR_h, /* hours */
+        PANDAS_FR_m, /* minutes */
+        PANDAS_FR_s, /* seconds */
+        PANDAS_FR_ms,/* milliseconds */
+        PANDAS_FR_us,/* microseconds */
+        PANDAS_FR_ns,/* nanoseconds */
+        PANDAS_FR_ps,/* picoseconds */
+        PANDAS_FR_fs,/* femtoseconds */
+        PANDAS_FR_as,/* attoseconds */
+} PANDAS_DATETIMEUNIT;
+
+#define PANDAS_DATETIME_NUMUNITS 14
+
+#define PANDAS_DATETIME_MAX_ISO8601_STRLEN (21+3*5+1+3*6+6+1)
+
+#define PANDAS_DATETIME_NAT NPY_MIN_INT64
+
+typedef struct {
+        npy_int64 year;
+        npy_int32 month, day, hour, min, sec, us, ps, as;
+} pandas_datetimestruct;
+
+typedef struct {
+    PANDAS_DATETIMEUNIT base;
+    int num;
+} pandas_datetime_metadata;
 
 // stuff pandas needs
 // ----------------------------------------------------------------------------
 
-int convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
-                                         NPY_DATETIMEUNIT *out_bestunit,
+int convert_pydatetime_to_datetimestruct(PyObject *obj, pandas_datetimestruct *out,
+                                         PANDAS_DATETIMEUNIT *out_bestunit,
                                          int apply_tzinfo);
 
+npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d);
+
+void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, npy_datetimestruct *result);
+
 int dayofweek(int y, int m, int d);
 
 static int _days_per_month_table[2][12] = {
@@ -22,7 +57,7 @@ static int _days_per_month_table[2][12] = {
     { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
 };
 
-// stuff numpy needs in header
+// stuff numpy-derived code needs in header
 // ----------------------------------------------------------------------------
 
 int is_leapyear(npy_int64 year);
@@ -36,22 +71,22 @@ int is_leapyear(npy_int64 year);
  * Returns 0 on success, -1 on failure.
  */
 int
-convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
-                                   const npy_datetimestruct *dts,
+convert_datetimestruct_to_datetime(pandas_datetime_metadata *meta,
+                                   const pandas_datetimestruct *dts,
                                    npy_datetime *out);
 
 /*
  * Calculates the days offset from the 1970 epoch.
  */
 npy_int64
-get_datetimestruct_days(const npy_datetimestruct *dts);
+get_datetimestruct_days(const pandas_datetimestruct *dts);
 
 /*
  * Adjusts a datetimestruct based on a minutes offset. Assumes
  * the current values are valid.
  */
 void
-add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes);
+add_minutes_to_datetimestruct(pandas_datetimestruct *dts, int minutes);
 
 /*
  * This provides the casting rules for the TIMEDELTA data type units.
@@ -60,19 +95,21 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes);
  * months units, and all the other units.
  */
 //npy_bool
-//can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
-//                          NPY_DATETIMEUNIT dst_unit,
+//can_cast_timedelta64_units(PANDAS_DATETIMEUNIT src_unit,
+//                          PANDAS_DATETIMEUNIT dst_unit,
 //                          NPY_CASTING casting);
 
 npy_bool
-can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
-                          NPY_DATETIMEUNIT dst_unit,
+can_cast_datetime64_units(PANDAS_DATETIMEUNIT src_unit,
+                          PANDAS_DATETIMEUNIT dst_unit,
                           NPY_CASTING casting);
 
 
 int
-convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
+convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
                                     npy_datetime dt,
-                                    npy_datetimestruct *out);
+                                    pandas_datetimestruct *out);
+
+
 
 #endif
diff --git a/pandas/src/np_datetime_strings.c b/pandas/src/np_datetime_strings.c
index 1224ce16c953c..15ea534935c9e 100644
--- a/pandas/src/np_datetime_strings.c
+++ b/pandas/src/np_datetime_strings.c
@@ -57,21 +57,21 @@ typedef time_t NPY_TIME_T;
 /*}*/
 
 /* Exported as DATETIMEUNITS in multiarraymodule.c */
-static char *_datetime_strings[NPY_DATETIME_NUMUNITS] = {
-    NPY_STR_Y,
-    NPY_STR_M,
-    NPY_STR_W,
-    NPY_STR_D,
-    NPY_STR_h,
-    NPY_STR_m,
-    NPY_STR_s,
-    NPY_STR_ms,
-    NPY_STR_us,
-    NPY_STR_ns,
-    NPY_STR_ps,
-    NPY_STR_fs,
-    NPY_STR_as,
-    "generic"
+static char *_datetime_strings[PANDAS_DATETIME_NUMUNITS] = {
+    "Y",
+    "M",
+    "W",
+    "D",
+    "B",
+    "h",
+    "m",
+    "s",
+    "ms",
+    "us",
+    "ns",
+    "ps",
+    "fs",
+    "as",
 };
 /*
  * Wraps `localtime` functionality for multiple platforms. This
@@ -170,8 +170,8 @@ get_gmtime(NPY_TIME_T *ts, struct tm *tms)
  * Returns 0 on success, -1 on failure.
  */
 static int
-convert_datetimestruct_utc_to_local(npy_datetimestruct *out_dts_local,
-                const npy_datetimestruct *dts_utc, int *out_timezone_offset)
+convert_datetimestruct_utc_to_local(pandas_datetimestruct *out_dts_local,
+                const pandas_datetimestruct *dts_utc, int *out_timezone_offset)
 {
     NPY_TIME_T rawtime = 0, localrawtime;
     struct tm tm_;
@@ -197,7 +197,7 @@ convert_datetimestruct_utc_to_local(npy_datetimestruct *out_dts_local,
     /*
      * Convert everything in 'dts' to a time_t, to minutes precision.
      * This is POSIX time, which skips leap-seconds, but because
-     * we drop the seconds value from the npy_datetimestruct, everything
+     * we drop the seconds value from the pandas_datetimestruct, everything
      * is ok for this operation.
      */
     rawtime = (time_t)get_datetimestruct_days(out_dts_local) * 24 * 60 * 60;
@@ -236,8 +236,8 @@ convert_datetimestruct_utc_to_local(npy_datetimestruct *out_dts_local,
  * Returns 0 on success, -1 on failure.
  */
 static int
-convert_datetimestruct_local_to_utc(npy_datetimestruct *out_dts_utc,
-                const npy_datetimestruct *dts_local)
+convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
+                const pandas_datetimestruct *dts_local)
 {
     npy_int64 year_correction = 0;
 
@@ -306,11 +306,11 @@ convert_datetimestruct_local_to_utc(npy_datetimestruct *out_dts_utc,
 }
 
 /* int */
-/* parse_python_string(PyObject* obj, npy_datetimestruct *dts) { */
+/* parse_python_string(PyObject* obj, pandas_datetimestruct *dts) { */
 /*     PyObject *bytes = NULL; */
 /*     char *str = NULL; */
 /*     Py_ssize_t len = 0; */
-/*     NPY_DATETIMEUNIT bestunit = -1; */
+/*     PANDAS_DATETIMEUNIT bestunit = -1; */
 
 /*     /\* Convert to an ASCII string for the date parser *\/ */
 /*     if (PyUnicode_Check(obj)) { */
@@ -329,7 +329,7 @@ convert_datetimestruct_local_to_utc(npy_datetimestruct *out_dts_utc,
 /*     } */
 
 /*     /\* Parse the ISO date *\/ */
-/*     if (parse_iso_8601_datetime(str, len, NPY_FR_us, NPY_UNSAFE_CASTING, */
+/*     if (parse_iso_8601_datetime(str, len, PANDAS_FR_us, NPY_UNSAFE_CASTING, */
 /*                             dts, NULL, &bestunit, NULL) < 0) { */
 /*         Py_DECREF(bytes); */
 /*         return -1; */
@@ -377,20 +377,20 @@ convert_datetimestruct_local_to_utc(npy_datetimestruct *out_dts_utc,
  */
 int
 parse_iso_8601_datetime(char *str, int len,
-                    NPY_DATETIMEUNIT unit,
+                    PANDAS_DATETIMEUNIT unit,
                     NPY_CASTING casting,
-                    npy_datetimestruct *out,
+                    pandas_datetimestruct *out,
                     npy_bool *out_local,
-                    NPY_DATETIMEUNIT *out_bestunit,
+                    PANDAS_DATETIMEUNIT *out_bestunit,
                     npy_bool *out_special)
 {
     int year_leap = 0;
     int i, numdigits;
     char *substr, sublen;
-    NPY_DATETIMEUNIT bestunit;
+    PANDAS_DATETIMEUNIT bestunit;
 
     /* Initialize the output to all zeros */
-    memset(out, 0, sizeof(npy_datetimestruct));
+    memset(out, 0, sizeof(pandas_datetimestruct));
     out->month = 1;
     out->day = 1;
 
@@ -420,7 +420,7 @@ parse_iso_8601_datetime(char *str, int len,
         out->month = tm_.tm_mon + 1;
         out->day = tm_.tm_mday;
 
-        bestunit = NPY_FR_D;
+        bestunit = PANDAS_FR_D;
 
         /*
          * Indicate that this was a special value, and
@@ -454,15 +454,15 @@ parse_iso_8601_datetime(char *str, int len,
                     tolower(str[1]) == 'o' &&
                     tolower(str[2]) == 'w') {
         NPY_TIME_T rawtime = 0;
-        PyArray_DatetimeMetaData meta;
+        pandas_datetime_metadata meta;
 
         time(&rawtime);
 
         /* Set up a dummy metadata for the conversion */
-        meta.base = NPY_FR_s;
+        meta.base = PANDAS_FR_s;
         meta.num = 1;
 
-        bestunit = NPY_FR_s;
+        bestunit = PANDAS_FR_s;
 
         /*
          * Indicate that this was a special value, and
@@ -536,7 +536,7 @@ parse_iso_8601_datetime(char *str, int len,
         if (out_local != NULL) {
             *out_local = 0;
         }
-        bestunit = NPY_FR_Y;
+        bestunit = PANDAS_FR_Y;
         goto finish;
     }
     else if (*substr == '-') {
@@ -573,7 +573,7 @@ parse_iso_8601_datetime(char *str, int len,
         if (out_local != NULL) {
             *out_local = 0;
         }
-        bestunit = NPY_FR_M;
+        bestunit = PANDAS_FR_M;
         goto finish;
     }
     else if (*substr == '-') {
@@ -611,7 +611,7 @@ parse_iso_8601_datetime(char *str, int len,
         if (out_local != NULL) {
             *out_local = 0;
         }
-        bestunit = NPY_FR_D;
+        bestunit = PANDAS_FR_D;
         goto finish;
     }
     else if (*substr != 'T' && *substr != ' ') {
@@ -644,7 +644,7 @@ parse_iso_8601_datetime(char *str, int len,
         --sublen;
     }
     else {
-        bestunit = NPY_FR_h;
+        bestunit = PANDAS_FR_h;
         goto parse_timezone;
     }
 
@@ -675,7 +675,7 @@ parse_iso_8601_datetime(char *str, int len,
         --sublen;
     }
     else {
-        bestunit = NPY_FR_m;
+        bestunit = PANDAS_FR_m;
         goto parse_timezone;
     }
 
@@ -706,7 +706,7 @@ parse_iso_8601_datetime(char *str, int len,
         --sublen;
     }
     else {
-        bestunit = NPY_FR_s;
+        bestunit = PANDAS_FR_s;
         goto parse_timezone;
     }
 
@@ -724,10 +724,10 @@ parse_iso_8601_datetime(char *str, int len,
 
     if (sublen == 0 || !isdigit(*substr)) {
         if (numdigits > 3) {
-            bestunit = NPY_FR_us;
+            bestunit = PANDAS_FR_us;
         }
         else {
-            bestunit = NPY_FR_ms;
+            bestunit = PANDAS_FR_ms;
         }
         goto parse_timezone;
     }
@@ -746,10 +746,10 @@ parse_iso_8601_datetime(char *str, int len,
 
     if (sublen == 0 || !isdigit(*substr)) {
         if (numdigits > 3) {
-            bestunit = NPY_FR_ps;
+            bestunit = PANDAS_FR_ps;
         }
         else {
-            bestunit = NPY_FR_ns;
+            bestunit = PANDAS_FR_ns;
         }
         goto parse_timezone;
     }
@@ -767,10 +767,10 @@ parse_iso_8601_datetime(char *str, int len,
     }
 
     if (numdigits > 3) {
-        bestunit = NPY_FR_as;
+        bestunit = PANDAS_FR_as;
     }
     else {
-        bestunit = NPY_FR_fs;
+        bestunit = PANDAS_FR_fs;
     }
 
 parse_timezone:
@@ -911,54 +911,44 @@ parse_iso_8601_datetime(char *str, int len,
  * objects with the given local and unit settings.
  */
 int
-get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
+get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base)
 {
     int len = 0;
 
     /* If no unit is provided, return the maximum length */
     if (base == -1) {
-        return NPY_DATETIME_MAX_ISO8601_STRLEN;
+        return PANDAS_DATETIME_MAX_ISO8601_STRLEN;
     }
 
     switch (base) {
         /* Generic units can only be used to represent NaT */
-        /*case NPY_FR_GENERIC:*/
+        /*case PANDAS_FR_GENERIC:*/
         /*    return 4;*/
-        case NPY_FR_as:
+        case PANDAS_FR_as:
             len += 3;  /* "###" */
-            break;
-        case NPY_FR_fs:
+        case PANDAS_FR_fs:
             len += 3;  /* "###" */
-            break;
-        case NPY_FR_ps:
+        case PANDAS_FR_ps:
             len += 3;  /* "###" */
-            break;
-        case NPY_FR_ns:
+        case PANDAS_FR_ns:
             len += 3;  /* "###" */
-            break;
-        case NPY_FR_us:
+        case PANDAS_FR_us:
             len += 3;  /* "###" */
-            break;
-        case NPY_FR_ms:
+        case PANDAS_FR_ms:
             len += 4;  /* ".###" */
-            break;
-        case NPY_FR_s:
+        case PANDAS_FR_s:
             len += 3;  /* ":##" */
-            break;
-        case NPY_FR_m:
+        case PANDAS_FR_m:
             len += 3;  /* ":##" */
-            break;
-        case NPY_FR_h:
+        case PANDAS_FR_h:
             len += 3;  /* "T##" */
-            break;
-        case NPY_FR_D:
-        case NPY_FR_W:
+        case PANDAS_FR_D:
+        case PANDAS_FR_B:
+        case PANDAS_FR_W:
             len += 3;  /* "-##" */
-            break;
-        case NPY_FR_M:
+        case PANDAS_FR_M:
             len += 3;  /* "-##" */
-            break;
-        case NPY_FR_Y:
+        case PANDAS_FR_Y:
             len += 21; /* 64-bit year */
             break;
         default:
@@ -966,7 +956,7 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
             break;
     }
 
-    if (base >= NPY_FR_h) {
+    if (base >= PANDAS_FR_h) {
         if (local) {
             len += 5;  /* "+####" or "-####" */
         }
@@ -984,49 +974,49 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
  * Finds the largest unit whose value is nonzero, and for which
  * the remainder for the rest of the units is zero.
  */
-static NPY_DATETIMEUNIT
-lossless_unit_from_datetimestruct(npy_datetimestruct *dts)
+static PANDAS_DATETIMEUNIT
+lossless_unit_from_datetimestruct(pandas_datetimestruct *dts)
 {
     if (dts->as % 1000 != 0) {
-        return NPY_FR_as;
+        return PANDAS_FR_as;
     }
     else if (dts->as != 0) {
-        return NPY_FR_fs;
+        return PANDAS_FR_fs;
     }
     else if (dts->ps % 1000 != 0) {
-        return NPY_FR_ps;
+        return PANDAS_FR_ps;
     }
     else if (dts->ps != 0) {
-        return NPY_FR_ns;
+        return PANDAS_FR_ns;
     }
     else if (dts->us % 1000 != 0) {
-        return NPY_FR_us;
+        return PANDAS_FR_us;
     }
     else if (dts->us != 0) {
-        return NPY_FR_ms;
+        return PANDAS_FR_ms;
     }
     else if (dts->sec != 0) {
-        return NPY_FR_s;
+        return PANDAS_FR_s;
     }
     else if (dts->min != 0) {
-        return NPY_FR_m;
+        return PANDAS_FR_m;
     }
     else if (dts->hour != 0) {
-        return NPY_FR_h;
+        return PANDAS_FR_h;
     }
     else if (dts->day != 1) {
-        return NPY_FR_D;
+        return PANDAS_FR_D;
     }
     else if (dts->month != 1) {
-        return NPY_FR_M;
+        return PANDAS_FR_M;
     }
     else {
-        return NPY_FR_Y;
+        return PANDAS_FR_Y;
     }
 }
 
 /*
- * Converts an npy_datetimestruct to an (almost) ISO 8601
+ * Converts an pandas_datetimestruct to an (almost) ISO 8601
  * NULL-terminated string. If the string fits in the space exactly,
  * it leaves out the NULL terminator and returns success.
  *
@@ -1052,11 +1042,11 @@ lossless_unit_from_datetimestruct(npy_datetimestruct *dts)
  *  string was too short).
  */
 int
-make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                    int local, NPY_DATETIMEUNIT base, int tzoffset,
+make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
+                    int local, PANDAS_DATETIMEUNIT base, int tzoffset,
                     NPY_CASTING casting)
 {
-    npy_datetimestruct dts_local;
+    pandas_datetimestruct dts_local;
     int timezone_offset = 0;
 
     char *substr = outstr, sublen = outlen;
@@ -1074,12 +1064,12 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
          * If there's a timezone, use at least minutes precision,
          * and never split up hours and minutes by default
          */
-        if ((base < NPY_FR_m && local) || base == NPY_FR_h) {
-            base = NPY_FR_m;
+        if ((base < PANDAS_FR_m && local) || base == PANDAS_FR_h) {
+            base = PANDAS_FR_m;
         }
         /* Don't split up dates by default */
-        else if (base < NPY_FR_D) {
-            base = NPY_FR_D;
+        else if (base < PANDAS_FR_D) {
+            base = PANDAS_FR_D;
         }
     }
     /*
@@ -1088,8 +1078,8 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
      * TODO: Could print weeks with YYYY-Www format if the week
      *       epoch is a Monday.
      */
-    else if (base == NPY_FR_W) {
-        base = NPY_FR_D;
+    else if (base == PANDAS_FR_W) {
+        base = PANDAS_FR_D;
     }
 
     /* Use the C API to convert from UTC to local time */
@@ -1104,7 +1094,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     }
     /* Use the manually provided tzoffset */
     else if (local) {
-        /* Make a copy of the npy_datetimestruct we can modify */
+        /* Make a copy of the pandas_datetimestruct we can modify */
         dts_local = *dts;
         dts = &dts_local;
 
@@ -1120,7 +1110,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
      */
     if (casting != NPY_UNSAFE_CASTING) {
         /* Producing a date as a local time is always 'unsafe' */
-        if (base <= NPY_FR_D && local) {
+        if (base <= PANDAS_FR_D && local) {
             PyErr_SetString(PyExc_TypeError, "Cannot create a local "
                         "timezone-based date string from a NumPy "
                         "datetime without forcing 'unsafe' casting");
@@ -1128,7 +1118,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
         }
         /* Only 'unsafe' and 'same_kind' allow data loss */
         else {
-            NPY_DATETIMEUNIT unitprec;
+            PANDAS_DATETIMEUNIT unitprec;
 
             unitprec = lossless_unit_from_datetimestruct(dts);
             if (casting != NPY_SAME_KIND_CASTING && unitprec > base) {
@@ -1163,7 +1153,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= tmplen;
 
     /* Stop if the unit is years */
-    if (base == NPY_FR_Y) {
+    if (base == PANDAS_FR_Y) {
         if (sublen > 0) {
             *substr = '\0';
         }
@@ -1187,7 +1177,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is months */
-    if (base == NPY_FR_M) {
+    if (base == PANDAS_FR_M) {
         if (sublen > 0) {
             *substr = '\0';
         }
@@ -1211,7 +1201,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is days */
-    if (base == NPY_FR_D) {
+    if (base == PANDAS_FR_D) {
         if (sublen > 0) {
             *substr = '\0';
         }
@@ -1235,7 +1225,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is hours */
-    if (base == NPY_FR_h) {
+    if (base == PANDAS_FR_h) {
         goto add_time_zone;
     }
 
@@ -1256,7 +1246,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is minutes */
-    if (base == NPY_FR_m) {
+    if (base == PANDAS_FR_m) {
         goto add_time_zone;
     }
 
@@ -1277,7 +1267,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is seconds */
-    if (base == NPY_FR_s) {
+    if (base == PANDAS_FR_s) {
         goto add_time_zone;
     }
 
@@ -1302,7 +1292,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 4;
 
     /* Stop if the unit is milliseconds */
-    if (base == NPY_FR_ms) {
+    if (base == PANDAS_FR_ms) {
         goto add_time_zone;
     }
 
@@ -1323,7 +1313,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is microseconds */
-    if (base == NPY_FR_us) {
+    if (base == PANDAS_FR_us) {
         goto add_time_zone;
     }
 
@@ -1344,7 +1334,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is nanoseconds */
-    if (base == NPY_FR_ns) {
+    if (base == PANDAS_FR_ns) {
         goto add_time_zone;
     }
 
@@ -1365,7 +1355,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is picoseconds */
-    if (base == NPY_FR_ps) {
+    if (base == PANDAS_FR_ps) {
         goto add_time_zone;
     }
 
@@ -1386,7 +1376,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
     sublen -= 3;
 
     /* Stop if the unit is femtoseconds */
-    if (base == NPY_FR_fs) {
+    if (base == PANDAS_FR_fs) {
         goto add_time_zone;
     }
 
diff --git a/pandas/src/np_datetime_strings.h b/pandas/src/np_datetime_strings.h
index 0226d0aaccad6..9a2488fefaf56 100644
--- a/pandas/src/np_datetime_strings.h
+++ b/pandas/src/np_datetime_strings.h
@@ -42,11 +42,11 @@
  */
 int
 parse_iso_8601_datetime(char *str, int len,
-                    NPY_DATETIMEUNIT unit,
+                    PANDAS_DATETIMEUNIT unit,
                     NPY_CASTING casting,
-                    npy_datetimestruct *out,
+                    pandas_datetimestruct *out,
                     npy_bool *out_local,
-                    NPY_DATETIMEUNIT *out_bestunit,
+                    PANDAS_DATETIMEUNIT *out_bestunit,
                     npy_bool *out_special);
 
 /*
@@ -54,10 +54,10 @@ parse_iso_8601_datetime(char *str, int len,
  * objects with the given local and unit settings.
  */
 int
-get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
+get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base);
 
 /*
- * Converts an npy_datetimestruct to an (almost) ISO 8601
+ * Converts an pandas_datetimestruct to an (almost) ISO 8601
  * NULL-terminated string.
  *
  * If 'local' is non-zero, it produces a string in local time with
@@ -79,8 +79,8 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
  *  string was too short).
  */
 int
-make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                    int local, NPY_DATETIMEUNIT base, int tzoffset,
+make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
+                    int local, PANDAS_DATETIMEUNIT base, int tzoffset,
                     NPY_CASTING casting);
 
 #endif

From 3d83387d8c408055e7d4071b7a051c9bbc45a7b1 Mon Sep 17 00:00:00 2001
From: Mark Wiebe <mwwiebe@gmail.com>
Date: Mon, 7 May 2012 14:14:22 -0500
Subject: [PATCH 014/114] Use datetime64 with a 'us' unit explicitly, for 1.6
 and 1.7 compatibility

---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fde2ac81e56de..1e7b01605e812 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1013,7 +1013,7 @@ def form_blocks(data, axes):
         blocks.append(int_block)
 
     if len(datetime_dict):
-        datetime_block = _simple_blockify(datetime_dict, items, np.datetime64)
+        datetime_block = _simple_blockify(datetime_dict, items, np.dtype('M8[ms]'))
         blocks.append(datetime_block)
 
     if len(bool_dict):

From c53e0938fa09b7254d80a60b0051bc77e8b72dcf Mon Sep 17 00:00:00 2001
From: Mark Wiebe <mwwiebe@gmail.com>
Date: Mon, 7 May 2012 17:46:59 -0500
Subject: [PATCH 015/114] Use an explicit unit for the 1.7 datetime64 scalar
 constructor

---
 pandas/tseries/index.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 8a77cde766a26..2cee089d788a4 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1228,7 +1228,10 @@ def _to_m8(key):
         # this also converts strings
         key = Timestamp(key)
 
-    return np.datetime64(lib.pydt_to_i8(key))
+    if np.__version__[:3] == '1.6':
+        return np.datetime64(lib.pydt_to_i8(key))
+    else:
+        return np.datetime64(lib.pydt_to_i8(key), 'us')
 
 
 def _to_m8_array(arr):

From 89bd89833b99adf2b420218e8a2ac4329e824272 Mon Sep 17 00:00:00 2001
From: Mark Wiebe <mwwiebe@gmail.com>
Date: Mon, 7 May 2012 17:47:39 -0500
Subject: [PATCH 016/114] Use assert_equal instead of assert, to see the actual
 values

---
 pandas/tests/test_groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index f7aba1ecfd523..ebdd38d0937e3 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -18,6 +18,7 @@
 import pandas.core.common as com
 import pandas.core.datetools as dt
 import numpy as np
+from numpy.testing import assert_equal
 
 import pandas.util.testing as tm
 
@@ -484,7 +485,7 @@ def test_series_agg_multi_pure_python(self):
                           'F' : np.random.randn(11)})
 
         def bad(x):
-            assert(len(x.base) == len(x))
+            assert_equal(len(x.base), len(x))
             return 'foo'
 
         result = data.groupby(['A', 'B']).agg(bad)

From 4e6720fb27933f7b5300e05c658de9a608f086fb Mon Sep 17 00:00:00 2001
From: Mark Wiebe <mwwiebe@gmail.com>
Date: Tue, 8 May 2012 09:43:13 -0500
Subject: [PATCH 017/114] Microseconds (us) not milliseconds (ms)

---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1e7b01605e812..efc6d38bf9de2 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1013,7 +1013,7 @@ def form_blocks(data, axes):
         blocks.append(int_block)
 
     if len(datetime_dict):
-        datetime_block = _simple_blockify(datetime_dict, items, np.dtype('M8[ms]'))
+        datetime_block = _simple_blockify(datetime_dict, items, np.dtype('M8[us]'))
         blocks.append(datetime_block)
 
     if len(bool_dict):

From a7bccd867ee1e7b17b4e4fd5d0b3b242acd27ae5 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 14:05:03 -0400
Subject: [PATCH 018/114] TST: use NaT value

---
 pandas/src/datetime.pxd | 16 ++++++++++------
 pandas/src/datetime.pyx |  5 ++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd
index ed56029b6ef0f..213f29c5e2605 100644
--- a/pandas/src/datetime.pxd
+++ b/pandas/src/datetime.pxd
@@ -1,4 +1,4 @@
-from numpy cimport int64_t
+from numpy cimport int64_t, int32_t, npy_int64, npy_int32
 from cpython cimport PyObject
 
 
@@ -75,15 +75,19 @@ cdef extern from "np_datetime.h":
         PANDAS_FR_as
 
     ctypedef struct pandas_datetimestruct:
-        int64_t year
-        int month, day, hour, min, sec, us, ps, as
+        npy_int64 year
+        npy_int32 month, day, hour, min, sec, us, ps, as
 
-    int convert_pydatetime_to_datetimestruct(PyObject *obj, pandas_datetimestruct *out,
+    int convert_pydatetime_to_datetimestruct(PyObject *obj,
+                                             pandas_datetimestruct *out,
                                              PANDAS_DATETIMEUNIT *out_bestunit,
                                              int apply_tzinfo)
 
-    npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d)
-    void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result)
+    npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
+                                                   pandas_datetimestruct *d)
+    void pandas_datetime_to_datetimestruct(npy_datetime val,
+                                           PANDAS_DATETIMEUNIT fr,
+                                           pandas_datetimestruct *result)
     int _days_per_month_table[2][12]
 
     int dayofweek(int y, int m, int d)
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 5988179eb4371..4627e0bd8facd 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -737,19 +737,18 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
 
     from dateutil.parser import parse
 
-
     try:
         result = np.empty(n, dtype='M8[us]')
         iresult = result.view('i8')
         for i in range(n):
             val = strings[i]
             if util._checknull(val):
-                result[i] = 'NaT'
+                iresult[i] = NaT
             elif PyDateTime_Check(val):
                 result[i] = val
             else:
                 if len(val) == 0:
-                    result[i] = 'NaT'
+                    iresult[i] = NaT
                     continue
                 try:
                     result[i] = parse(val, dayfirst=dayfirst)

From b98e4e0a5350e26bbe46e84f0ea455611379cce9 Mon Sep 17 00:00:00 2001
From: Adam Klein <adamklein@gmail.com>
Date: Tue, 10 Apr 2012 14:57:31 -0400
Subject: [PATCH 019/114] ENH: #1020 implementation. needs tests and adding to
 API

---
 pandas/src/moments.pyx | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/pandas/src/moments.pyx b/pandas/src/moments.pyx
index 6bf644cf9ac78..0e9c05de1abb4 100644
--- a/pandas/src/moments.pyx
+++ b/pandas/src/moments.pyx
@@ -72,6 +72,45 @@ def median(ndarray arr):
         return (kth_smallest(arr, n / 2) +
                 kth_smallest(arr, n / 2 - 1)) / 2
 
+# -------------- Min, Max subsequence
+
+def max_subseq(ndarray[double_t] arr):
+    cdef:
+        Py_ssize_t i=0,s=0,e=0,T,n
+        double m, S
+
+    n = len(arr)
+
+    if len(arr) == 0:
+        return (-1,-1,None)
+
+    m = arr[0]
+    S = m
+    T = 0
+
+    for i in range(1, n):
+        # S = max { S + A[i], A[i] )
+        if (S > 0):
+            S = S + arr[i]
+        else:
+            S = arr[i]
+            T = i
+        if S > m:
+            s = T
+            e = i
+            m = S
+
+    return (s, e, m)
+
+def min_subseq(ndarray[double_t] arr):
+    cdef:
+        Py_ssize_t s, e
+        double m
+
+    (s, e, m) = max_subseq(-arr)
+
+    return (s, e, -m)
+
 #-------------------------------------------------------------------------------
 # Rolling sum
 

From 1ecb5c463366435ca53672c2e1940013633d4e37 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 14:30:24 -0400
Subject: [PATCH 020/114] ENH: add docs and add match function to API, close
 #502

---
 RELEASE.rst                |  1 +
 pandas/core/algorithms.py  | 30 +++++++++++++++++++++++-------
 pandas/core/api.py         |  2 ++
 pandas/tests/test_algos.py | 25 +++++++++++++++++++++++++
 vb_suite/miscellaneous.py  | 12 ++++++++++++
 5 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index bcd41813fb91f..c93c6c9fa357f 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -38,6 +38,7 @@ pandas 0.8.0
   - Add support for indexes (dates or otherwise) with duplicates and common
     sense indexing/selection functionality
   - Series/DataFrame.update methods, in-place variant of combine_first (#961)
+  - Add ``match`` function to API (#502)
 
 **Improvements to existing features**
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index e7126fd489e9d..f9315d63c5865 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -8,29 +8,45 @@
 import pandas.core.common as com
 import pandas._tseries as lib
 
-def match(values, index):
+def match(to_match, values, na_sentinel=-1):
     """
-
+    Compute locations of to_match into values
 
     Parameters
     ----------
+    to_match : array-like
+        values to find positions of
+    values : array-like
+        Unique set of values
+    na_sentinel : int, default -1
+        Value to mark "not found"
+
+    Examples
+    --------
 
     Returns
     -------
-    match : ndarray
+    match : ndarray of integers
     """
-    f = lambda htype, caster: _match_generic(values, index, htype, caster)
-    return _hashtable_algo(f, index.dtype)
+    values = np.asarray(values)
+    if issubclass(values.dtype.type, basestring):
+        values = np.array(values, dtype='O')
+
+    f = lambda htype, caster: _match_generic(to_match, values, htype, caster)
+    return _hashtable_algo(f, values.dtype)
 
 def unique(values):
     """
+    Compute unique values (not necessarily sorted) efficiently from input array
+    of values
 
     Parameters
     ----------
+    values : array-like
 
     Returns
     -------
-
+    uniques
     """
     f = lambda htype, caster: _unique_generic(values, htype, caster)
     return _hashtable_algo(f, values.dtype)
@@ -98,7 +114,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
     labels, counts = table.get_labels(values, uniques, 0, na_sentinel)
 
     labels = com._ensure_platform_int(labels)
-    
+
     uniques = com._asarray_tuplesafe(uniques)
     if sort and len(counts) > 0:
         sorter = uniques.argsort()
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 41721c483a5b3..6a986f4842f43 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 
+from pandas.core.algorithms import factorize, match, unique
+
 from pandas.core.common import isnull, notnull, save, load
 from pandas.core.factor import Factor
 from pandas.core.format import set_printoptions
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 06b0a6798e9b1..a64b880c3478e 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -2,6 +2,31 @@
 
 import numpy as np
 
+
 import pandas.core.algorithms as algos
 import pandas.util.testing as tm
 
+
+class TestMatch(unittest.TestCase):
+
+    def test_ints(self):
+        values = np.array([0, 2, 1])
+        to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0])
+
+        result = algos.match(to_match, values)
+        expected = np.array([0, 2, 1, 1, 0, 2, -1, 0])
+        self.assert_(np.array_equal(result, expected))
+
+    def test_strings(self):
+        values = ['foo', 'bar', 'baz']
+        to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux']
+
+        result = algos.match(to_match, values)
+        expected = np.array([1, 0, -1, 0, 1, 2, -1])
+        self.assert_(np.array_equal(result, expected))
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
+                   exit=False)
+
diff --git a/vb_suite/miscellaneous.py b/vb_suite/miscellaneous.py
index 8295d275f2dd6..eeeaf01a8b4af 100644
--- a/vb_suite/miscellaneous.py
+++ b/vb_suite/miscellaneous.py
@@ -20,3 +20,15 @@ def prop(self):
 misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly",
                                 ncalls=2000000)
 
+#----------------------------------------------------------------------
+# match
+
+setup = common_setup + """
+from pandas.util.testing import rands
+
+uniques = np.array([rands(10) for _ in xrange(1000)], dtype='O')
+all = uniques.repeat(10)
+"""
+
+match_strings = Benchmark("match(all, uniques)", setup,
+                          start_date=datetime(2012, 5, 12))

From 4ac9abb0bfc80c715ae3d6ed67c15a995b7078da Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 15:23:10 -0400
Subject: [PATCH 021/114] ENH: add Cython nth/last functions, vbenchmarks.
 close #1043

---
 RELEASE.rst                  |   2 +
 pandas/core/groupby.py       |  32 ++++--
 pandas/src/groupby.pyx       | 183 +++++++++++++++++++++++++++++++++++
 pandas/tests/test_groupby.py |  22 ++---
 vb_suite/groupby.py          |  17 ++++
 5 files changed, 238 insertions(+), 18 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index c93c6c9fa357f..32c3844810eb8 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -39,6 +39,8 @@ pandas 0.8.0
     sense indexing/selection functionality
   - Series/DataFrame.update methods, in-place variant of combine_first (#961)
   - Add ``match`` function to API (#502)
+  - Add Cython-optimized first, last, min, max, prod functions to GroupBy (#994,
+    #1043)
 
 **Improvements to existing features**
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 0c1e580c5bbc4..58c75479e2004 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -30,6 +30,20 @@ def f(self):
 
     return f
 
+def _first_compat(x, axis=0):
+    x = np.asarray(x)
+    x = x[com.notnull(x)]
+    if len(x) == 0:
+        return np.nan
+    return x[0]
+
+def _last_compat(x, axis=0):
+    x = np.asarray(x)
+    x = x[com.notnull(x)]
+    if len(x) == 0:
+        return np.nan
+    return x[-1]
+
 
 class GroupBy(object):
     """
@@ -314,6 +328,8 @@ def size(self):
     prod = _groupby_function('prod', 'prod', np.prod)
     min = _groupby_function('min', 'min', np.min)
     max = _groupby_function('max', 'max', np.max)
+    first = _groupby_function('first', 'first', _first_compat)
+    last = _groupby_function('last', 'last', _last_compat)
 
     def ohlc(self):
         """
@@ -323,11 +339,11 @@ def ohlc(self):
         """
         return self._cython_agg_general('ohlc')
 
-    def last(self):
-        return self.nth(-1)
+    # def last(self):
+    #     return self.nth(-1)
 
-    def first(self):
-        return self.nth(0)
+    # def first(self):
+    #     return self.nth(0)
 
     def nth(self, n):
         def picker(arr):
@@ -621,7 +637,9 @@ def get_group_levels(self):
         'max' : lib.group_max,
         'mean' : lib.group_mean,
         'var' : lib.group_var,
-        'std' : lib.group_var
+        'std' : lib.group_var,
+        'first': lambda a, b, c, d: lib.group_nth(a, b, c, d, 1),
+        'last': lib.group_last
     }
 
     _cython_transforms = {
@@ -858,7 +876,9 @@ def names(self):
         'max' : lib.group_max_bin,
         'var' : lib.group_var_bin,
         'std' : lib.group_var_bin,
-        'ohlc' : lib.group_ohlc
+        'ohlc' : lib.group_ohlc,
+        'first': lambda a, b, c, d: lib.group_nth_bin(a, b, c, d, 1),
+        'last': lib.group_last_bin
     }
 
     _name_functions = {
diff --git a/pandas/src/groupby.pyx b/pandas/src/groupby.pyx
index 049f70b5f8237..48a71f4d1d51f 100644
--- a/pandas/src/groupby.pyx
+++ b/pandas/src/groupby.pyx
@@ -330,6 +330,188 @@ def group_prod(ndarray[float64_t, ndim=2] out,
             else:
                 out[i, j] = prodx[i, j]
 
+#----------------------------------------------------------------------
+# first, nth, last
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth(ndarray[float64_t, ndim=2] out,
+              ndarray[int64_t] counts,
+              ndarray[float64_t, ndim=2] values,
+              ndarray[int64_t] labels, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_nth_bin(ndarray[float64_t, ndim=2] out,
+                  ndarray[int64_t] counts,
+                  ndarray[float64_t, ndim=2] values,
+                  ndarray[int64_t] bins, int64_t rank):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                if nobs[b, j] == rank:
+                    resx[b, j] = val
+
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_last(ndarray[float64_t, ndim=2] out,
+               ndarray[int64_t] counts,
+               ndarray[float64_t, ndim=2] values,
+               ndarray[int64_t] labels):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+
+    nobs = np.zeros((<object> out).shape, dtype=np.int64)
+    resx = np.empty_like(out)
+
+    N, K = (<object> values).shape
+
+    for i in range(N):
+        lab = labels[i]
+        if lab < 0:
+            continue
+
+        counts[lab] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+    for i in range(len(counts)):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_last_bin(ndarray[float64_t, ndim=2] out,
+                   ndarray[int64_t] counts,
+                   ndarray[float64_t, ndim=2] values,
+                   ndarray[int64_t] bins):
+    '''
+    Only aggregates on axis=0
+    '''
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, b
+        float64_t val, count
+        ndarray[float64_t, ndim=2] resx, nobs
+
+    nobs = np.zeros_like(out)
+    resx = np.empty_like(out)
+
+    if bins[len(bins) - 1] == len(values):
+        ngroups = len(bins)
+    else:
+        ngroups = len(bins) + 1
+
+    N, K = (<object> values).shape
+
+    b = 0
+    for i in range(N):
+        while b < ngroups - 1 and i >= bins[b]:
+            b += 1
+
+        counts[b] += 1
+        for j in range(K):
+            val = values[i, j]
+
+            # not nan
+            if val == val:
+                nobs[b, j] += 1
+                resx[b, j] = val
+
+    for i in range(ngroups):
+        for j in range(K):
+            if nobs[i, j] == 0:
+                out[i, j] = nan
+            else:
+                out[i, j] = resx[i, j]
+
+#----------------------------------------------------------------------
+# group_min, group_max
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
@@ -787,6 +969,7 @@ def group_min_bin(ndarray[float64_t, ndim=2] out,
             else:
                 out[i, j] = minx[i, j]
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_max_bin(ndarray[float64_t, ndim=2] out,
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index ebdd38d0937e3..240c86bf9df4a 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -121,25 +121,23 @@ def test_basic(self):
         # corner cases
         self.assertRaises(Exception, grouped.aggregate, lambda x: x * 2)
 
+    def test_first_last_nth(self):
         # tests for first / last / nth
         grouped = self.df.groupby('A')
         first = grouped.first()
-        expected = grouped.get_group('bar')
-        expected = expected.xs(expected.index[0])[1:]
-        expected.name ='bar'
-        assert_series_equal(first.xs('bar'), expected)
+        expected = self.df.ix[[1, 0], ['C', 'D']]
+        expected.index = ['bar', 'foo']
+        assert_frame_equal(first, expected)
 
         last = grouped.last()
-        expected = grouped.get_group('bar')
-        expected = expected.xs(expected.index[-1])[1:]
-        expected.name ='bar'
-        assert_series_equal(last.xs('bar'), expected)
+        expected = self.df.ix[[5, 7], ['C', 'D']]
+        expected.index = ['bar', 'foo']
+        assert_frame_equal(last, expected)
 
         nth = grouped.nth(1)
-        expected = grouped.get_group('foo')
-        expected = expected.xs(expected.index[1])[1:]
-        expected.name ='foo'
-        assert_series_equal(nth.xs('foo'), expected)
+        expected = self.df.ix[[3, 2], ['B', 'C', 'D']]
+        expected.index = ['bar', 'foo']
+        assert_frame_equal(nth, expected)
 
     def test_empty_groups(self):
         # GH # 1048
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
index f5d8ababfc17f..dfffdf61d3df7 100644
--- a/vb_suite/groupby.py
+++ b/vb_suite/groupby.py
@@ -155,3 +155,20 @@ def f():
 
 groupby_apply_dict_return = Benchmark('data.groupby(labels).apply(f)',
                                       setup, start_date=datetime(2011, 12, 15))
+
+#----------------------------------------------------------------------
+# First / last functions
+
+setup = common_setup + """
+labels = np.arange(10000).repeat(10)
+data = Series(randn(len(labels)))
+data[::3] = np.nan
+data[1::3] = np.nan
+labels = labels.take(np.random.permutation(len(labels)))
+"""
+
+groupby_first = Benchmark('data.groupby(labels).first()', setup,
+                          start_date=datetime(2012, 5, 1))
+
+groupby_last = Benchmark('data.groupby(labels).last()', setup,
+                          start_date=datetime(2012, 5, 1))

From b246ae10bf1a346050ee9e745ac0519fe89fea6e Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 15:41:47 -0400
Subject: [PATCH 022/114] BUG: fix improper quarter parsing for frequencies
 other than Q-DEC, close #1228

---
 pandas/tseries/frequencies.py       |   2 +
 pandas/tseries/tests/test_period.py | 293 ++++++++++++++--------------
 pandas/tseries/tools.py             |  14 +-
 3 files changed, 162 insertions(+), 147 deletions(-)

diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 705d66d84f4bf..fe198b10132ec 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -951,6 +951,8 @@ def is_superperiod(source, target):
         return target not in ['D', 'B', 'H', 'T', 'S']
 
 def _get_rule_month(source, default='DEC'):
+    if isinstance(source, offsets.DateOffset):
+        source = source.rule_code
     source = source.upper()
     if '-' not in source:
         return default
diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
index 1842a6f9bbbf0..1e897eb73c284 100644
--- a/pandas/tseries/tests/test_period.py
+++ b/pandas/tseries/tests/test_period.py
@@ -36,6 +36,7 @@ def test_period_cons_quarterly(self):
         for month in MONTHS:
             freq = 'Q-%s' % month
             exp = Period('1989Q3', freq=freq)
+            self.assert_('1989Q3' in str(exp))
             stamp = exp.to_timestamp('D', how='end')
             p = Period(stamp, freq=freq)
             self.assertEquals(p, exp)
@@ -1058,29 +1059,29 @@ def test_index_duplicate_periods(self):
         assert_series_equal(result, expected)
 
     def test_constructor(self):
-        ii = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
-        assert_equal(len(ii), 9)
+        pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
+        assert_equal(len(pi), 9)
 
-        ii = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009')
-        assert_equal(len(ii), 4 * 9)
+        pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009')
+        assert_equal(len(pi), 4 * 9)
 
-        ii = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
-        assert_equal(len(ii), 12 * 9)
+        pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
+        assert_equal(len(pi), 12 * 9)
 
-        ii = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009')
-        assert_equal(len(ii), 365 * 9 + 2)
+        pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009')
+        assert_equal(len(pi), 365 * 9 + 2)
 
-        ii = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009')
-        assert_equal(len(ii), 261 * 9)
+        pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009')
+        assert_equal(len(pi), 261 * 9)
 
-        ii = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00')
-        assert_equal(len(ii), 365 * 24)
+        pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00')
+        assert_equal(len(pi), 365 * 24)
 
-        ii = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59')
-        assert_equal(len(ii), 24 * 60)
+        pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59')
+        assert_equal(len(pi), 24 * 60)
 
-        ii = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59')
-        assert_equal(len(ii), 24 * 60 * 60)
+        pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59')
+        assert_equal(len(pi), 24 * 60 * 60)
 
         start = Period('02-Apr-2005', 'B')
         i1 = PeriodIndex(start=start, periods=20)
@@ -1137,96 +1138,96 @@ def test_constructor(self):
         self.assertRaises(ValueError, PeriodIndex, vals)
 
     def test_shift(self):
-        ii1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(1).values, ii2.values)
-
-        ii1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(-1).values, ii2.values)
-
-        ii1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(1).values, ii2.values)
-
-        ii1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(-1).values, ii2.values)
-
-        ii1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(1).values, ii2.values)
-
-        ii1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
-        ii2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009')
-        assert_equal(len(ii1), len(ii2))
-        assert_equal(ii1.shift(-1).values, ii2.values)
+        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(1).values, pi2.values)
+
+        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(-1).values, pi2.values)
+
+        pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(1).values, pi2.values)
+
+        pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(-1).values, pi2.values)
+
+        pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(1).values, pi2.values)
+
+        pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009')
+        pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009')
+        assert_equal(len(pi1), len(pi2))
+        assert_equal(pi1.shift(-1).values, pi2.values)
 
     def test_asfreq(self):
-        ii1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001')
-        ii2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001')
-        ii3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001')
-        ii4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001')
-        ii5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00')
-        ii6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00')
-        ii7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00')
-
-        self.assertEquals(ii1.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii1.asfreq('Q', 's'), ii2)
-        self.assertEquals(ii1.asfreq('M', 'start'), ii3)
-        self.assertEquals(ii1.asfreq('D', 'StarT'), ii4)
-        self.assertEquals(ii1.asfreq('H', 'beGIN'), ii5)
-        self.assertEquals(ii1.asfreq('Min', 'S'), ii6)
-        self.assertEquals(ii1.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii2.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii2.asfreq('M', 'S'), ii3)
-        self.assertEquals(ii2.asfreq('D', 'S'), ii4)
-        self.assertEquals(ii2.asfreq('H', 'S'), ii5)
-        self.assertEquals(ii2.asfreq('Min', 'S'), ii6)
-        self.assertEquals(ii2.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii3.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii3.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii3.asfreq('D', 'S'), ii4)
-        self.assertEquals(ii3.asfreq('H', 'S'), ii5)
-        self.assertEquals(ii3.asfreq('Min', 'S'), ii6)
-        self.assertEquals(ii3.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii4.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii4.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii4.asfreq('M', 'S'), ii3)
-        self.assertEquals(ii4.asfreq('H', 'S'), ii5)
-        self.assertEquals(ii4.asfreq('Min', 'S'), ii6)
-        self.assertEquals(ii4.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii5.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii5.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii5.asfreq('M', 'S'), ii3)
-        self.assertEquals(ii5.asfreq('D', 'S'), ii4)
-        self.assertEquals(ii5.asfreq('Min', 'S'), ii6)
-        self.assertEquals(ii5.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii6.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii6.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii6.asfreq('M', 'S'), ii3)
-        self.assertEquals(ii6.asfreq('D', 'S'), ii4)
-        self.assertEquals(ii6.asfreq('H', 'S'), ii5)
-        self.assertEquals(ii6.asfreq('S', 'S'), ii7)
-
-        self.assertEquals(ii7.asfreq('A', 'S'), ii1)
-        self.assertEquals(ii7.asfreq('Q', 'S'), ii2)
-        self.assertEquals(ii7.asfreq('M', 'S'), ii3)
-        self.assertEquals(ii7.asfreq('D', 'S'), ii4)
-        self.assertEquals(ii7.asfreq('H', 'S'), ii5)
-        self.assertEquals(ii7.asfreq('Min', 'S'), ii6)
-
-        #self.assertEquals(ii7.asfreq('A', 'E'), i_end)
+        pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001')
+        pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001')
+        pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001')
+        pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001')
+        pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00')
+        pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00')
+        pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00')
+
+        self.assertEquals(pi1.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi1.asfreq('Q', 's'), pi2)
+        self.assertEquals(pi1.asfreq('M', 'start'), pi3)
+        self.assertEquals(pi1.asfreq('D', 'StarT'), pi4)
+        self.assertEquals(pi1.asfreq('H', 'beGIN'), pi5)
+        self.assertEquals(pi1.asfreq('Min', 'S'), pi6)
+        self.assertEquals(pi1.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi2.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi2.asfreq('M', 'S'), pi3)
+        self.assertEquals(pi2.asfreq('D', 'S'), pi4)
+        self.assertEquals(pi2.asfreq('H', 'S'), pi5)
+        self.assertEquals(pi2.asfreq('Min', 'S'), pi6)
+        self.assertEquals(pi2.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi3.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi3.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi3.asfreq('D', 'S'), pi4)
+        self.assertEquals(pi3.asfreq('H', 'S'), pi5)
+        self.assertEquals(pi3.asfreq('Min', 'S'), pi6)
+        self.assertEquals(pi3.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi4.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi4.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi4.asfreq('M', 'S'), pi3)
+        self.assertEquals(pi4.asfreq('H', 'S'), pi5)
+        self.assertEquals(pi4.asfreq('Min', 'S'), pi6)
+        self.assertEquals(pi4.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi5.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi5.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi5.asfreq('M', 'S'), pi3)
+        self.assertEquals(pi5.asfreq('D', 'S'), pi4)
+        self.assertEquals(pi5.asfreq('Min', 'S'), pi6)
+        self.assertEquals(pi5.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi6.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi6.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi6.asfreq('M', 'S'), pi3)
+        self.assertEquals(pi6.asfreq('D', 'S'), pi4)
+        self.assertEquals(pi6.asfreq('H', 'S'), pi5)
+        self.assertEquals(pi6.asfreq('S', 'S'), pi7)
+
+        self.assertEquals(pi7.asfreq('A', 'S'), pi1)
+        self.assertEquals(pi7.asfreq('Q', 'S'), pi2)
+        self.assertEquals(pi7.asfreq('M', 'S'), pi3)
+        self.assertEquals(pi7.asfreq('D', 'S'), pi4)
+        self.assertEquals(pi7.asfreq('H', 'S'), pi5)
+        self.assertEquals(pi7.asfreq('Min', 'S'), pi6)
+
+        #self.assertEquals(pi7.asfreq('A', 'E'), i_end)
 
     def test_ts_repr(self):
         index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010')
@@ -1258,18 +1259,18 @@ def test_badinput(self):
 
     def test_dti_to_period(self):
         dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
-        ii1 = dti.to_period()
-        ii2 = dti.to_period(freq='D')
+        pi1 = dti.to_period()
+        pi2 = dti.to_period(freq='D')
 
-        self.assertEquals(ii1[0], Period('Jan 2005', freq='M'))
-        self.assertEquals(ii2[0], Period('1/31/2005', freq='D'))
+        self.assertEquals(pi1[0], Period('Jan 2005', freq='M'))
+        self.assertEquals(pi2[0], Period('1/31/2005', freq='D'))
 
-        self.assertEquals(ii1[-1], Period('Nov 2005', freq='M'))
-        self.assertEquals(ii2[-1], Period('11/30/2005', freq='D'))
+        self.assertEquals(pi1[-1], Period('Nov 2005', freq='M'))
+        self.assertEquals(pi2[-1], Period('11/30/2005', freq='D'))
 
-    def test_iindex_slice_index(self):
-        ii = PeriodIndex(start='1/1/10', end='12/31/12', freq='M')
-        s = Series(np.random.rand(len(ii)), index=ii)
+    def test_pindex_slice_index(self):
+        pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M')
+        s = Series(np.random.rand(len(pi)), index=pi)
         res = s['2010']
         exp = s[0:12]
         assert_series_equal(res, exp)
@@ -1277,20 +1278,20 @@ def test_iindex_slice_index(self):
         exp = s[12:24]
         assert_series_equal(res, exp)
 
-    def test_iindex_qaccess(self):
-        ii = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
-        s = Series(np.random.rand(len(ii)), index=ii).cumsum()
+    def test_pindex_qaccess(self):
+        pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
+        s = Series(np.random.rand(len(pi)), index=pi).cumsum()
         # Todo: fix these accessors!
         self.assert_(s['05Q4'] == s[2])
 
     def test_period_dt64_round_trip(self):
         dti = date_range('1/1/2000', '1/7/2002', freq='B')
-        ii = dti.to_period()
-        self.assert_(ii.to_timestamp().equals(dti))
+        pi = dti.to_period()
+        self.assert_(pi.to_timestamp().equals(dti))
 
         dti = date_range('1/1/2000', '1/7/2002', freq='B')
-        ii = dti.to_period(freq='H')
-        self.assert_(ii.to_timestamp().equals(dti))
+        pi = dti.to_period(freq='H')
+        self.assert_(pi.to_timestamp().equals(dti))
 
     def test_to_period_quarterly(self):
         # make sure we can make the round trip
@@ -1309,19 +1310,19 @@ def test_no_multiples(self):
                           freq='2A')
         self.assertRaises(ValueError, Period, '1989', freq='2A')
 
-    # def test_iindex_multiples(self):
-    #     ii = PeriodIndex(start='1/1/10', end='12/31/12', freq='2M')
-    #     self.assertEquals(ii[0], Period('1/1/10', '2M'))
-    #     self.assertEquals(ii[1], Period('3/1/10', '2M'))
+    # def test_pindex_multiples(self):
+    #     pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='2M')
+    #     self.assertEquals(pi[0], Period('1/1/10', '2M'))
+    #     self.assertEquals(pi[1], Period('3/1/10', '2M'))
 
-    #     self.assertEquals(ii[0].asfreq('6M'), ii[2].asfreq('6M'))
-    #     self.assertEquals(ii[0].asfreq('A'), ii[2].asfreq('A'))
+    #     self.assertEquals(pi[0].asfreq('6M'), pi[2].asfreq('6M'))
+    #     self.assertEquals(pi[0].asfreq('A'), pi[2].asfreq('A'))
 
-    #     self.assertEquals(ii[0].asfreq('M', how='S'),
+    #     self.assertEquals(pi[0].asfreq('M', how='S'),
     #                       Period('Jan 2010', '1M'))
-    #     self.assertEquals(ii[0].asfreq('M', how='E'),
+    #     self.assertEquals(pi[0].asfreq('M', how='E'),
     #                       Period('Feb 2010', '1M'))
-    #     self.assertEquals(ii[1].asfreq('M', how='S'),
+    #     self.assertEquals(pi[1].asfreq('M', how='S'),
     #                       Period('Mar 2010', '1M'))
 
     #     i = Period('1/1/2010 12:05:18', '5S')
@@ -1424,33 +1425,33 @@ def test_fields(self):
         # year, month, day, hour, minute
         # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
         # qyear
-        ii = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2003')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2003')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='D', start='12/1/2001', end='1/1/2002')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='D', start='12/1/2001', end='1/1/2002')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='B', start='12/1/2001', end='1/1/2002')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='B', start='12/1/2001', end='1/1/2002')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:59')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:59')
+        self._check_all_fields(pi)
 
-        ii = PeriodIndex(freq='S', start='12/31/2001', end='1/1/2001 00:00:01')
-        self._check_all_fields(ii)
+        pi = PeriodIndex(freq='S', start='12/31/2001', end='1/1/2001 00:00:01')
+        self._check_all_fields(pi)
 
         end_intv = Period('2006-12-31', 'W')
         i1 = PeriodIndex(end=end_intv, periods=10)
-        self._check_all_fields(ii)
+        self._check_all_fields(pi)
 
     def _check_all_fields(self, periodindex):
         fields = ['year', 'month', 'day', 'hour', 'minute',
diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py
index bfeec9e8081da..d7a296df8655b 100644
--- a/pandas/tseries/tools.py
+++ b/pandas/tseries/tools.py
@@ -135,6 +135,8 @@ def parse_time_string(arg, freq=None):
     """
     from pandas.core.format import print_config
     from pandas.tseries.offsets import DateOffset
+    from pandas.tseries.frequencies import (_get_rule_month, _month_numbers,
+                                            _get_freq_str)
 
     if not isinstance(arg, basestring):
         return arg
@@ -165,7 +167,17 @@ def parse_time_string(arg, freq=None):
                     y = int(y_str)
                     if add_century:
                         y += 2000
-                    ret = default.replace(year=y, month=(q-1)*3+1)
+
+                    if freq is not None:
+                        # hack attack, #1228
+                        mnum = _month_numbers[_get_rule_month(freq)] + 1
+                        month = (mnum + (q - 1) * 3) % 12 + 1
+                        if month > mnum:
+                            y -= 1
+                    else:
+                        month = (q - 1) * 3 + 1
+
+                    ret = default.replace(year=y, month=month)
                     return ret, ret, 'quarter'
 
             is_mo_str = freq is not None and freq == 'M'

From 4d052f9add3f2023c5fd7065ca7289b6255e391d Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 15:46:33 -0400
Subject: [PATCH 023/114] BUG: implement Series.repeat to get expected results,
 close #1229

---
 pandas/core/series.py       |  8 ++++++++
 pandas/tests/test_series.py | 13 +++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0ca78e3d2236e..aff454220f8b6 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -609,6 +609,14 @@ def astype(self, dtype):
         casted = com._astype_nansafe(self.values, dtype)
         return self._constructor(casted, index=self.index, name=self.name)
 
+    def repeat(self, reps):
+        """
+        See ndarray.repeat
+        """
+        new_index = self.index.repeat(reps)
+        new_values = self.values.repeat(reps)
+        return Series(new_values, index=new_index, name=self.name)
+
     def reshape(self, newshape, order='C'):
         """
         See numpy.ndarray.reshape
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index f905834473012..4b8248dcc7bcd 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2701,6 +2701,19 @@ def test_timeseries_coercion(self):
         self.assert_(isinstance(ser, TimeSeries))
         self.assert_(isinstance(ser.index, DatetimeIndex))
 
+    def test_repeat(self):
+        s = Series(np.random.randn(3), index=['a', 'b', 'c'])
+
+        reps = s.repeat(5)
+        exp = Series(s.values.repeat(5), index=s.index.values.repeat(5))
+        assert_series_equal(reps, exp)
+
+        to_rep = [2, 3, 4]
+        reps = s.repeat(to_rep)
+        exp = Series(s.values.repeat(to_rep),
+                     index=s.index.values.repeat(to_rep))
+        assert_series_equal(reps, exp)
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
                    exit=False)

From 74a6be08a89587b0d912ebee9b1f2d0f4edd7c44 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 17:28:06 -0400
Subject: [PATCH 024/114] ENH: anchor resampling frequencies like 5minute that
 evenly subdivide one day in resampling to always get regular intervals. a bit
 more testing needed, but close #1165

---
 pandas/core/frame.py                  |  1 -
 pandas/core/internals.py              |  3 +-
 pandas/tseries/index.py               |  2 +-
 pandas/tseries/offsets.py             |  3 +-
 pandas/tseries/resample.py            | 49 +++++++++++++++++++++++++++
 pandas/tseries/tests/test_resample.py | 25 ++++++++++++++
 pandas/tseries/tests/test_util.py     | 16 ++++-----
 pandas/tseries/util.py                |  2 +-
 8 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 36202948e9a78..2694e9f3e484a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -32,7 +32,6 @@
 from pandas.core.internals import BlockManager, make_block, form_blocks
 from pandas.core.series import Series, _radd_compat
 from pandas.compat.scipy import scoreatpercentile as _quantile
-from pandas.tseries.index import DatetimeIndex
 from pandas.tseries.period import PeriodIndex
 from pandas.util import py3compat
 from pandas.util.terminal import get_terminal_size
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index efc6d38bf9de2..f74c38ac5f450 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1013,7 +1013,8 @@ def form_blocks(data, axes):
         blocks.append(int_block)
 
     if len(datetime_dict):
-        datetime_block = _simple_blockify(datetime_dict, items, np.dtype('M8[us]'))
+        datetime_block = _simple_blockify(datetime_dict, items,
+                                          np.dtype('M8[us]'))
         blocks.append(datetime_block)
 
     if len(bool_dict):
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 2cee089d788a4..83badec6d757b 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1138,7 +1138,7 @@ def _generate_regular_range(start, end, periods, offset):
         raise ValueError('Must specify two of start, end, or periods')
 
     if isinstance(offset, Tick):
-        stride = offset.us_stride()
+        stride = offset.micros
         if periods is None:
             b = Timestamp(start).value
             e = Timestamp(end).value
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index c1d915a04453c..98716ed1f57d4 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -962,7 +962,8 @@ def delta(self):
 
         return self._delta
 
-    def us_stride(self):
+    @property
+    def micros(self):
         return _delta_to_microseconds(self.delta)
 
     def apply(self, other):
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 4cd548dc9120a..2497bf752fa22 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -233,12 +233,22 @@ def _make_time_bins(axis, freq, begin=None, end=None,
     return binner, bins, labels
 
 def _get_range_edges(axis, begin, end, offset, closed='left'):
+    from pandas.tseries.offsets import Tick, _delta_to_microseconds
     if isinstance(offset, basestring):
         offset = to_offset(offset)
 
     if not isinstance(offset, DateOffset):
         raise ValueError("Rule not a recognized offset")
 
+    if isinstance(offset, Tick):
+        day_micros = _delta_to_microseconds(timedelta(1))
+        # #1165
+        if ((day_micros % offset.micros) == 0 and begin is None
+            and end is None):
+            return _adjust_dates_anchored(axis[0], axis[-1], offset,
+                                          closed=closed)
+
+
     if begin is None:
         if closed == 'left':
             first = Timestamp(offset.rollback(axis[0]))
@@ -255,6 +265,45 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
 
     return first, last
 
+
+def _adjust_dates_anchored(first, last, offset, closed='right'):
+    from pandas.tseries.tools import normalize_date
+
+    start_day_micros = Timestamp(normalize_date(first)).value
+    last_day_micros = Timestamp(normalize_date(last)).value
+
+    foffset = (first.value - start_day_micros) % offset.micros
+    loffset = (last.value - last_day_micros) % offset.micros
+
+    if closed == 'right':
+        if foffset > 0:
+            # roll back
+            fresult = first.value - foffset
+        else:
+            fresult = first.value - offset.micros
+
+        if loffset > 0:
+            # roll forward
+            lresult = last.value + (offset.micros - loffset)
+        else:
+            # already the end of the road
+            lresult = last.value
+    else:  # closed == 'left'
+        if foffset > 0:
+            fresult = first.value - foffset
+        else:
+            # start of the road
+            fresult = first.value
+
+        if loffset > 0:
+            # roll forward
+            lresult = last.value + (offset.micros - loffset)
+        else:
+            lresult = last.value + offset.micros
+
+    return Timestamp(fresult), Timestamp(lresult)
+
+
 def asfreq(obj, freq, method=None, how=None):
     """
     Utility frequency conversion method for Series/DataFrame
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index 3b35921ede5ba..d508a73f9c7bc 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -347,6 +347,31 @@ def test_resample_panel_numpy(self):
         expected = panel.resample('M', how='mean', axis=1)
         tm.assert_panel_equal(result, expected)
 
+    def test_resample_anchored_ticks(self):
+        # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
+        # "anchor" the origin at midnight so we get regular intervals rather
+        # than starting from the first timestamp which might start in the middle
+        # of a desired interval
+
+        rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s')
+        ts = Series(np.random.randn(len(rng)), index=rng)
+        ts[:2] = np.nan # so results are the same
+
+        freqs = ['t', '5t', '15t', '30t', '4h', '12h']
+        for freq in freqs:
+            result = ts[2:].resample(freq, closed='left', label='left')
+            expected = ts.resample(freq, closed='left', label='left')
+            assert_series_equal(result, expected)
+
+    def test_resample_daily_anchored(self):
+        rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T')
+        ts = Series(np.random.randn(len(rng)), index=rng)
+        ts[:2] = np.nan # so results are the same
+
+        result = ts[2:].resample('D', closed='left', label='left')
+        expected = ts.resample('D', closed='left', label='left')
+        assert_series_equal(result, expected)
+
 
 def _simple_ts(start, end, freq='D'):
     rng = date_range(start, end, freq=freq)
diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py
index 38d812915d0f7..02a98858ed808 100644
--- a/pandas/tseries/tests/test_util.py
+++ b/pandas/tseries/tests/test_util.py
@@ -6,17 +6,17 @@
 from pandas import Series, date_range
 import pandas.util.testing as tm
 
-from pandas.tseries.util import convert_to_annual, isleapyear
+from pandas.tseries.util import pivot_annual, isleapyear
 
-class TestConvertAnnual(unittest.TestCase):
+class TestPivotAnnual(unittest.TestCase):
     """
-    New pandas of scikits.timeseries convert_to_annual
+    New pandas of scikits.timeseries pivot_annual
     """
     def test_daily(self):
         rng = date_range('1/1/2000', '12/31/2004', freq='D')
         ts = Series(np.random.randn(len(rng)), index=rng)
 
-        annual = convert_to_annual(ts, 'D')
+        annual = pivot_annual(ts, 'D')
 
         doy = ts.index.dayofyear
         doy[(-isleapyear(ts.index.year)) & (doy >= 60)] += 1
@@ -40,7 +40,7 @@ def test_monthly(self):
         rng = date_range('1/1/2000', '12/31/2004', freq='M')
         ts = Series(np.random.randn(len(rng)), index=rng)
 
-        annual = convert_to_annual(ts, 'M')
+        annual = pivot_annual(ts, 'M')
 
         month = ts.index.month
 
@@ -49,13 +49,13 @@ def test_monthly(self):
             subset.index = [x.year for x in subset.index]
             tm.assert_series_equal(annual[i].dropna(), subset)
 
-    def test_interval_monthly(self):
+    def test_period_monthly(self):
         pass
 
-    def test_interval_daily(self):
+    def test_period_daily(self):
         pass
 
-    def test_interval_weekly(self):
+    def test_period_weekly(self):
         pass
 
 if __name__ == '__main__':
diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py
index c3b4b8272d5b9..2163deaf3c102 100644
--- a/pandas/tseries/util.py
+++ b/pandas/tseries/util.py
@@ -3,7 +3,7 @@
 from pandas.core.frame import DataFrame
 import pandas.core.nanops as nanops
 
-def convert_to_annual(series, freq=None):
+def pivot_annual(series, freq=None):
     """
     Group a series by years, taking leap years into account.
 

From e043862528b066f2d0e2c041ce1deeac2e181915 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 17:40:46 -0400
Subject: [PATCH 025/114] BUG: support resampling of period data to, e.g.
 5minute thoguh with timestamped result, close #1231

---
 pandas/tseries/resample.py            | 9 +++++++--
 pandas/tseries/tests/test_resample.py | 9 +++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 2497bf752fa22..081375f8245ee 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -57,6 +57,13 @@ def resample(self, obj):
         if isinstance(axis, DatetimeIndex):
             return self._resample_timestamps(obj)
         elif isinstance(axis, PeriodIndex):
+            offset = to_offset(self.freq)
+            if offset.n > 1:
+                if self.kind == 'period':  # pragma: no cover
+                    print 'Warning: multiple of frequency -> timestamps'
+                # Cannot have multiple of periods, convert to timestamp
+                self.kind = 'timestamp'
+
             if self.kind is None or self.kind == 'period':
                 return self._resample_periods(obj)
             else:
@@ -248,7 +255,6 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
             return _adjust_dates_anchored(axis[0], axis[-1], offset,
                                           closed=closed)
 
-
     if begin is None:
         if closed == 'left':
             first = Timestamp(offset.rollback(axis[0]))
@@ -259,7 +265,6 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
 
     if end is None:
         last = Timestamp(axis[-1] + offset)
-        # last = Timestamp(offset.rollforward(axis[-1]))
     else:
         last = Timestamp(offset.rollforward(end))
 
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index d508a73f9c7bc..5b3613e57620d 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -527,6 +527,15 @@ def test_cant_fill_missing_dups(self):
         s = TimeSeries(np.random.randn(5), index=rng)
         self.assertRaises(Exception, s.resample, 'A')
 
+    def test_resample_5minute(self):
+        rng = period_range('1/1/2000', '1/5/2000', freq='T')
+        ts = TimeSeries(np.random.randn(len(rng)), index=rng)
+
+        result = ts.resample('5min')
+        expected = ts.to_timestamp().resample('5min')
+        assert_series_equal(result, expected)
+
+
 class TestTimeGrouper(unittest.TestCase):
 
     def setUp(self):

From 996b9647a9c9c372e897f1a09f6e922f2f746bac Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 18:00:02 -0400
Subject: [PATCH 026/114] BUG: remove restriction in lib.Reducer that index by
 object dtype. close #1214

---
 pandas/src/reduce.pyx        | 16 ++++++++--------
 pandas/tests/test_tseries.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
index 3aa6388a144e1..af102e1f4e777 100644
--- a/pandas/src/reduce.pyx
+++ b/pandas/src/reduce.pyx
@@ -9,6 +9,7 @@ cdef class Reducer:
     cdef:
         Py_ssize_t increment, chunksize, nresults
         object arr, dummy, f, labels
+        bint can_set_name
 
     def __init__(self, object arr, object f, axis=1, dummy=None,
                  labels=None):
@@ -37,12 +38,14 @@ cdef class Reducer:
     def _check_dummy(self, dummy=None):
         if dummy is None:
             dummy = np.empty(self.chunksize, dtype=self.arr.dtype)
+            self.can_set_name = 0
         else:
             if dummy.dtype != self.arr.dtype:
                 raise ValueError('Dummy array must be same dtype')
             if len(dummy) != self.chunksize:
                 raise ValueError('Dummy array must be length %d' %
                                  self.chunksize)
+            self.can_set_name = type(dummy) != np.ndarray
 
         return dummy
 
@@ -54,7 +57,7 @@ cdef class Reducer:
             flatiter it
             object res
             bint set_label = 0
-            ndarray[object] labels
+            ndarray labels
 
         arr = self.arr
         chunk = self.dummy
@@ -62,18 +65,14 @@ cdef class Reducer:
         dummy_buf = chunk.data
         chunk.data = arr.data
 
-        set_label = self.labels is not None
-
+        set_label = self.labels is not None and self.can_set_name
         if set_label:
-            if not np.issubdtype(self.labels.dtype, object):
-                labels = self.labels.astype('O')
-            else:
-                labels = self.labels
+            labels = self.labels
 
         try:
             for i in range(self.nresults):
                 if set_label:
-                    chunk.name = labels[i]
+                    chunk.name = util.get_value_at(labels, i)
 
                 res = self.f(chunk)
                 if i == 0:
@@ -86,6 +85,7 @@ cdef class Reducer:
         except Exception, e:
             if hasattr(e, 'args'):
                 e.args = e.args + (i,)
+            print e
         finally:
             # so we don't free the wrong memory
             chunk.data = dummy_buf
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 12b515cb372da..d9ddf63fea29c 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -492,9 +492,37 @@ def test_to_object_array_tuples(self):
         except ImportError:
             pass
 
+
 class TestMoments(unittest.TestCase):
     pass
 
+
+class TestReducer(unittest.TestCase):
+
+    def test_int_index(self):
+        from pandas.core.series import Series
+
+        arr = np.random.randn(100, 4)
+
+        result = lib.reduce(arr, np.sum, labels=np.arange(4))
+        expected = arr.sum(0)
+        assert_almost_equal(result, expected)
+
+        result = lib.reduce(arr, np.sum, axis=1, labels=np.arange(100))
+        expected = arr.sum(1)
+        assert_almost_equal(result, expected)
+
+        dummy = Series(0., index=np.arange(100))
+        result = lib.reduce(arr, np.sum, dummy=dummy, labels=np.arange(4))
+        expected = arr.sum(0)
+        assert_almost_equal(result, expected)
+
+        dummy = Series(0., index=np.arange(4))
+        result = lib.reduce(arr, np.sum, axis=1,
+                            dummy=dummy, labels=np.arange(100))
+        expected = arr.sum(1)
+        assert_almost_equal(result, expected)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],

From 0cf9e3d166ba092f9e439340f505bdf750316eb0 Mon Sep 17 00:00:00 2001
From: Kelsey Jordahl <kjordahl@enthought.com>
Date: Tue, 8 May 2012 17:22:02 -0400
Subject: [PATCH 027/114] ENH: Allow different number of rows & columns in a
 histogram plot

---
 pandas/tools/plotting.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 8168e1367f962..bc43e5454c9b3 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -832,13 +832,16 @@ def hist_frame(data, grid=True, xlabelsize=None, xrot=None,
     """
     import matplotlib.pyplot as plt
     n = len(data.columns)
-    k = 1
-    while k ** 2 < n:
-        k += 1
-    _, axes = _subplots(nrows=k, ncols=k, ax=ax, squeeze=False)
+    rows, cols = 1, 1
+    while rows * cols < n:
+        if cols > rows:
+            rows += 1
+        else:
+            cols += 1
+    _, axes = _subplots(nrows=rows, ncols=cols, ax=ax, squeeze=False)
 
     for i, col in enumerate(com._try_sort(data.columns)):
-        ax = axes[i / k][i % k]
+        ax = axes[i / cols][i % cols]
         ax.xaxis.set_visible(True)
         ax.yaxis.set_visible(True)
         ax.hist(data[col].dropna().values, **kwds)
@@ -854,8 +857,8 @@ def hist_frame(data, grid=True, xlabelsize=None, xrot=None,
         if yrot is not None:
             plt.setp(ax.get_yticklabels(), rotation=yrot)
 
-    for j in range(i + 1, k**2):
-        ax = axes[j / k, j % k]
+    for j in range(i + 1, rows * cols):
+        ax = axes[j / cols, j % cols]
         ax.set_visible(False)
 
     ax.get_figure().subplots_adjust(wspace=0.3, hspace=0.3)

From 7baa84cbab789ff48c6cd0cb97569795551de001 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 18:13:29 -0400
Subject: [PATCH 028/114] TST: vbenchmark for #561, push more work til 0.9

---
 vb_suite/index_object.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py
index 3df763133da87..819a81a53db52 100644
--- a/vb_suite/index_object.py
+++ b/vb_suite/index_object.py
@@ -16,7 +16,20 @@
 rng2 = rng[:-1]
 """
 
-index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup,
-                                        name='index_datetime_intersection')
-index_datetime_union = Benchmark("rng.union(rng2)", setup,
-                                 name='index_datetime_union')
+index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup)
+index_datetime_union = Benchmark("rng.union(rng2)", setup)
+
+# integers
+setup = common_setup + """
+N = 1000000
+options = np.arange(N)
+
+left = Index(options.take(np.random.permutation(N)[:N // 2]))
+right = Index(options.take(np.random.permutation(N)[:N // 2]))
+"""
+
+index_int64_union = Benchmark('left.union(right)', setup,
+                              start_date=datetime(2011, 1, 1))
+
+index_int64_intersection = Benchmark('left.intersection(right)', setup,
+                                     start_date=datetime(2011, 1, 1))

From 8b972a1c65e33ad33fe04ea96a886b30cd5c7da7 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 18:16:40 -0400
Subject: [PATCH 029/114] BUG: don't print exception in reducer

---
 pandas/core/groupby.py | 5 ++---
 pandas/src/reduce.pyx  | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 58c75479e2004..c46a2395791b3 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -537,6 +537,7 @@ def indices(self):
             return self.groupings[0].indices
         else:
             # TODO: this is massively inefficient
+            foo
             to_groupby = zip(*(ping.grouper for ping in self.groupings))
             to_groupby = Index(to_groupby)
             return lib.groupby_indices(to_groupby)
@@ -2039,9 +2040,7 @@ def _intercept_cython(func):
     return _cython_table.get(func)
 
 def _groupby_indices(values):
-    if values.dtype != np.object_:
-        values = values.astype('O')
-    return lib.groupby_indices(values)
+    return lib.groupby_indices(com._ensure_object(values))
 
 def numpy_groupby(data, labels, axis=0):
     s = np.argsort(labels)
diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
index af102e1f4e777..2a956c53f2488 100644
--- a/pandas/src/reduce.pyx
+++ b/pandas/src/reduce.pyx
@@ -85,7 +85,6 @@ cdef class Reducer:
         except Exception, e:
             if hasattr(e, 'args'):
                 e.args = e.args + (i,)
-            print e
         finally:
             # so we don't free the wrong memory
             chunk.data = dummy_buf

From 93b522181adc209e0b293a0767f0249eef650d26 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 12 May 2012 18:16:59 -0400
Subject: [PATCH 030/114] BUG: rogue foo

---
 pandas/core/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index c46a2395791b3..7bd66e43d6542 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -537,7 +537,6 @@ def indices(self):
             return self.groupings[0].indices
         else:
             # TODO: this is massively inefficient
-            foo
             to_groupby = zip(*(ping.grouper for ping in self.groupings))
             to_groupby = Index(to_groupby)
             return lib.groupby_indices(to_groupby)

From eb460c0f960d5646048257a33e8f42373265532b Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sun, 13 May 2012 00:32:08 -0400
Subject: [PATCH 031/114] ENH: reimplment groupby_indices using better
 algorithmic tricks, associated vbenchmark. close #609

---
 pandas/core/algorithms.py |   3 +-
 pandas/core/groupby.py    |  21 +++++-
 pandas/src/groupby.pyx    | 113 +++++++++++++---------------
 pandas/src/sandbox.pyx    | 151 +++++++++-----------------------------
 vb_suite/groupby.py       |  20 +++++
 5 files changed, 123 insertions(+), 185 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index f9315d63c5865..44673249dfd4c 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -94,6 +94,7 @@ def _unique_generic(values, table_type, type_caster):
     uniques = table.unique(values)
     return uniques
 
+
 def factorize(values, sort=False, order=None, na_sentinel=-1):
     """
     Encode input values as an enumerated type or categorical variable
@@ -118,7 +119,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
     uniques = com._asarray_tuplesafe(uniques)
     if sort and len(counts) > 0:
         sorter = uniques.argsort()
-        reverse_indexer = np.empty(len(sorter), dtype=np.int32)
+        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
         reverse_indexer.put(sorter, np.arange(len(sorter)))
 
         mask = labels < 0
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 7bd66e43d6542..3d8f70892aa78 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -536,10 +536,9 @@ def indices(self):
         if len(self.groupings) == 1:
             return self.groupings[0].indices
         else:
-            # TODO: this is massively inefficient
-            to_groupby = zip(*(ping.grouper for ping in self.groupings))
-            to_groupby = Index(to_groupby)
-            return lib.groupby_indices(to_groupby)
+            label_list = [ping.labels for ping in self.groupings]
+            keys = [ping.group_index for ping in self.groupings]
+            return _get_indices_dict(label_list, keys)
 
     @property
     def labels(self):
@@ -1972,6 +1971,20 @@ def get_key(self, comp_id):
         return tuple(level[table.get_item(comp_id)]
                      for table, level in zip(self.tables, self.levels))
 
+
+def _get_indices_dict(label_list, keys):
+    shape = [len(x) for x in keys]
+    group_index = get_group_index(label_list, shape)
+
+    sorter, _ = lib.groupsort_indexer(com._ensure_int64(group_index),
+                                      np.prod(shape))
+
+    sorted_labels = [lab.take(sorter) for lab in label_list]
+    group_index = group_index.take(sorter)
+    index = np.arange(len(group_index)).take(sorter)
+
+    return lib.indices_fast(index, group_index, keys, sorted_labels)
+
 #----------------------------------------------------------------------
 # sorting levels...cleverly?
 
diff --git a/pandas/src/groupby.pyx b/pandas/src/groupby.pyx
index 48a71f4d1d51f..a05e619636dd4 100644
--- a/pandas/src/groupby.pyx
+++ b/pandas/src/groupby.pyx
@@ -746,7 +746,6 @@ def group_var(ndarray[float64_t, ndim=2] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-
 def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
                        object closed='left'):
     """
@@ -1107,8 +1106,8 @@ def group_ohlc(ndarray[float64_t, ndim=2] out,
             out[b, 3] = vclose
 
 
-# @cython.boundscheck(False)
-# @cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 def group_mean_bin(ndarray[float64_t, ndim=2] out,
                    ndarray[int64_t] counts,
                    ndarray[float64_t, ndim=2] values,
@@ -1268,62 +1267,6 @@ def lookup_values(ndarray[object] values, dict mapping):
         result[i] = mapping[values[i]]
     return maybe_convert_objects(result)
 
-def reduce_mean(ndarray[object] indices,
-                ndarray[object] buckets,
-                ndarray[float64_t] values,
-                inclusive=False):
-    cdef:
-        Py_ssize_t i, j, nbuckets, nvalues
-        ndarray[float64_t] output
-        float64_t the_sum, val, nobs
-
-
-
-    nbuckets = len(buckets)
-    nvalues = len(indices)
-
-    assert(len(values) == len(indices))
-
-    output = np.empty(nbuckets, dtype=float)
-    output.fill(np.NaN)
-
-    j = 0
-    for i from 0 <= i < nbuckets:
-        next_bound = buckets[i]
-        the_sum = 0
-        nobs = 0
-        if inclusive:
-            while j < nvalues and indices[j] <= next_bound:
-                val = values[j]
-                # not NaN
-                if val == val:
-                    the_sum += val
-                    nobs += 1
-                j += 1
-        else:
-            while j < nvalues and indices[j] < next_bound:
-                val = values[j]
-                # not NaN
-                if val == val:
-                    the_sum += val
-                    nobs += 1
-                j += 1
-
-        if nobs > 0:
-            output[i] = the_sum / nobs
-
-        if j >= nvalues:
-            break
-
-    return output
-
-def _bucket_locs(index, buckets, inclusive=False):
-    if inclusive:
-        locs = index.searchsorted(buckets, side='left')
-    else:
-        locs = index.searchsorted(buckets, side='right')
-
-    return locs
 
 def count_level_1d(ndarray[uint8_t, cast=True] mask,
                    ndarray[int64_t] labels, Py_ssize_t max_bin):
@@ -1341,6 +1284,7 @@ def count_level_1d(ndarray[uint8_t, cast=True] mask,
 
     return counts
 
+
 def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
                    ndarray[int64_t] labels, Py_ssize_t max_bin):
     cdef:
@@ -1357,6 +1301,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
 
     return counts
 
+
 def duplicated(list values, take_last=False):
     cdef:
         Py_ssize_t i, n
@@ -1411,7 +1356,7 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     return starts, ends
 
 
-def groupby_arrays(ndarray index, ndarray[int64_t] labels):
+def groupby_arrays(ndarray index, ndarray[int64_t] labels, sort=True):
     cdef:
         Py_ssize_t i, lab, cur, start, n = len(index)
         dict result = {}
@@ -1419,10 +1364,11 @@ def groupby_arrays(ndarray index, ndarray[int64_t] labels):
     index = np.asarray(index)
 
     # this is N log N. If this is a bottleneck may we worth fixing someday
-    indexer = labels.argsort(kind='mergesort')
+    if sort:
+        indexer = labels.argsort(kind='mergesort')
 
-    labels = labels.take(indexer)
-    index = index.take(indexer)
+        labels = labels.take(indexer)
+        index = index.take(indexer)
 
     if n == 0:
         return result
@@ -1438,4 +1384,45 @@ def groupby_arrays(ndarray index, ndarray[int64_t] labels):
             start = i
         cur = lab
 
+    result[cur] = index[start:]
+    return result
+
+def indices_fast(object index, ndarray[int64_t] labels, list keys,
+                 list sorted_labels):
+    cdef:
+        Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
+        dict result = {}
+        object tup
+
+    k = len(keys)
+
+    if n == 0:
+        return result
+
+    start = 0
+    cur = labels[0]
+    for i in range(1, n):
+        lab = labels[i]
+
+        if lab != cur:
+            if lab != -1:
+                tup = PyTuple_New(k)
+                for j in range(k):
+                    val = util.get_value_at(keys[j],
+                                            sorted_labels[j][i-1])
+                    PyTuple_SET_ITEM(tup, j, val)
+                    Py_INCREF(val)
+
+                result[tup] = index[start:i]
+            start = i
+        cur = lab
+
+    tup = PyTuple_New(k)
+    for j in range(k):
+        val = util.get_value_at(keys[j],
+                                sorted_labels[j][n - 1])
+        PyTuple_SET_ITEM(tup, j, val)
+        Py_INCREF(val)
+    result[tup] = index[start:]
+
     return result
diff --git a/pandas/src/sandbox.pyx b/pandas/src/sandbox.pyx
index c161ca6ad3c98..dabeb7cf3371c 100644
--- a/pandas/src/sandbox.pyx
+++ b/pandas/src/sandbox.pyx
@@ -421,117 +421,6 @@ def int64_unique(ndarray[int64_t] arr):
 
     return np.sort(uniques[:j])
 
-def group_add_bin(ndarray[float64_t, ndim=2] out,
-                  ndarray[int32_t] counts,
-                  ndarray[float64_t, ndim=2] values,
-                  ndarray[int32_t] bins):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, N, K, ngroups, b
-        float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-
-    ngroups = len(bins) + 1
-    N, K = (<object> values).shape
-
-    b = 0
-    if K > 1:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if val == val:
-                    nobs[b, j] += 1
-                    sumx[b, j] += val
-    else:
-        for i in range(N):
-            while b < ngroups - 1 and i >= bins[b]:
-                b += 1
-
-            counts[b] += 1
-            val = values[i, 0]
-
-            # not nan
-            if val == val:
-                nobs[b, 0] += 1
-                sumx[b, 0] += val
-            print i, b, counts, nobs.squeeze()
-
-    for i in range(ngroups):
-        print 'writing %d' % i
-        for j in range(K):
-            if nobs[i] == 0:
-                out[i, j] = nan
-            else:
-                out[i, j] = sumx[i, j]
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_add(ndarray[float64_t, ndim=2] out,
-              ndarray[int32_t] counts,
-              ndarray[float64_t, ndim=2] values,
-              ndarray[int32_t] labels):
-    '''
-    Only aggregates on axis=0
-    '''
-    cdef:
-        Py_ssize_t i, j, N, K, lab
-        float64_t val, count
-        ndarray[float64_t, ndim=2] sumx, nobs
-
-    nobs = np.zeros_like(out)
-    sumx = np.zeros_like(out)
-
-    N, K = (<object> values).shape
-
-    if K > 1:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if val == val:
-                    nobs[lab, j] += 1
-                    sumx[lab, j] += val
-    else:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            val = values[i, 0]
-
-            # not nan
-            if val == val:
-                nobs[lab, 0] += 1
-                sumx[lab, 0] += val
-
-    for i in range(len(counts)):
-        for j in range(K):
-            if nobs[i, j] == 0:
-                out[i, j] = nan
-            else:
-                out[i, j] = sumx[i, j]
-
-
-from datetime cimport getAbsTime
-
 
 # cdef extern from "kvec.h":
 
@@ -546,12 +435,6 @@ def test_foo(ndarray[int64_t] values):
     val = values[0]
     print val
 
-def get_abs_time(freq, dailyDate, originalDate):
-    return getAbsTime(freq, dailyDate, originalDate)
-
-have_pytz = 1
-import pytz
-
 # cdef extern from "foo.h":
 #     double add_things(double *a, double *b, double *c, int n)
 
@@ -581,3 +464,37 @@ def inner(ndarray[float64_t] x, ndarray[float64_t] y):
     for i in range(n):
         result += x[i] * y[i]
     return result
+
+def indices_fast(ndarray[int64_t] labels, list keys,
+                 list sorted_labels):
+    cdef:
+        Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
+        dict result = {}
+        object tup
+
+    index = np.arange(n)
+
+    k = len(keys)
+
+    if n == 0:
+        return result
+
+    start = 0
+    cur = labels[0]
+    for i in range(1, n):
+        lab = labels[i]
+
+        if lab != cur:
+            if lab != -1:
+                tup = PyTuple_New(k)
+                for j in range(k):
+                    val = util.get_value_at(keys[j],
+                                            sorted_labels[j][cur])
+                    PyTuple_SET_ITEM(tup, j, val)
+                    Py_INCREF(val)
+
+                result[tup] = index[start:i]
+            start = i
+        cur = lab
+
+    return result
diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
index dfffdf61d3df7..f690135e02e62 100644
--- a/vb_suite/groupby.py
+++ b/vb_suite/groupby.py
@@ -172,3 +172,23 @@ def f():
 
 groupby_last = Benchmark('data.groupby(labels).last()', setup,
                           start_date=datetime(2012, 5, 1))
+
+
+#----------------------------------------------------------------------
+# groupby_indices replacement, chop up Series
+
+setup = common_setup + """
+try:
+    rng = date_range('1/1/2000', '12/31/2005', freq='H')
+    year, month, day = rng.year, rng.month, rng.day
+except:
+    rng = date_range('1/1/2000', '12/31/2000', offset=datetools.Hour())
+    year = rng.map(lambda x: x.year)
+    month = rng.map(lambda x: x.month)
+    day = rng.map(lambda x: x.day)
+
+ts = Series(np.random.randn(len(rng)), index=rng)
+"""
+
+groupby_indices = Benchmark('len(ts.groupby([year, month, day]))',
+                            setup, start_date=datetime(2012, 1, 1))

From 197a7f6270b10135d2391263f71d173b1cb6f081 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sun, 13 May 2012 17:15:15 -0400
Subject: [PATCH 032/114] BLD: fix npy_* -> pandas_*, compiler warnings

---
 pandas/src/np_datetime.c | 3 ++-
 pandas/src/np_datetime.h | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/src/np_datetime.c b/pandas/src/np_datetime.c
index 6b238b87f0a9b..06b7b8abd8661 100644
--- a/pandas/src/np_datetime.c
+++ b/pandas/src/np_datetime.c
@@ -494,7 +494,8 @@ npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_da
     return result;
 }
 
-void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, npy_datetimestruct *result)
+void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
+                                       pandas_datetimestruct *result)
 {
     pandas_datetime_metadata meta;
 
diff --git a/pandas/src/np_datetime.h b/pandas/src/np_datetime.h
index ca96201d3b1a6..042ea11d015e9 100644
--- a/pandas/src/np_datetime.h
+++ b/pandas/src/np_datetime.h
@@ -48,7 +48,8 @@ int convert_pydatetime_to_datetimestruct(PyObject *obj, pandas_datetimestruct *o
 
 npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d);
 
-void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, npy_datetimestruct *result);
+void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
+                                       pandas_datetimestruct *result);
 
 int dayofweek(int y, int m, int d);
 
@@ -107,8 +108,8 @@ can_cast_datetime64_units(PANDAS_DATETIMEUNIT src_unit,
 
 int
 convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
-                                    npy_datetime dt,
-                                    pandas_datetimestruct *out);
+                                   npy_datetime dt,
+                                   pandas_datetimestruct *out);
 
 
 

From aca4c431e5374261f84ed322f88fe8c2454171b2 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sun, 13 May 2012 23:31:19 -0400
Subject: [PATCH 033/114] TST: remove one skip test

---
 pandas/core/indexing.py            |  2 +-
 pandas/sparse/tests/test_sparse.py |  1 -
 pandas/tests/test_multilevel.py    | 12 ------------
 3 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 5aa46e41d4c71..c2fb8d820bf8e 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -171,7 +171,7 @@ def _getitem_lowerdim(self, tup):
             except Exception:
                 if isinstance(tup[0], slice):
                     raise IndexingError
-                if tup[0] not in ax0:
+                if tup[0] not in ax0: # and tup[0] not in ax0.levels[0]:
                     raise
 
         # to avoid wasted computation
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
index 6bb6dd129c771..48d8bc0f77ca6 100644
--- a/pandas/sparse/tests/test_sparse.py
+++ b/pandas/sparse/tests/test_sparse.py
@@ -432,7 +432,6 @@ def test_operators_corner2(self):
         result = val - self.zbseries
         assert_sp_series_equal(result, 3 - self.zbseries)
 
-
     def test_binary_operators(self):
         def _check_inplace_op(op):
             tmp = self.bseries.copy()
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index ed4184c69746f..c9c6f5e290b99 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1100,18 +1100,6 @@ def test_partial_ix_missing(self):
         self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6))
         self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0)
 
-    def test_fancy_2d(self):
-        raise nose.SkipTest
-
-        result = self.frame.ix['foo', 'B']
-        expected = self.frame.xs('foo')['B']
-        assert_series_equal(result, expected)
-
-        ft = self.frame.T
-        result = ft.ix['B', 'foo']
-        expected = ft.xs('B')['foo']
-        assert_series_equal(result, expected)
-
     #----------------------------------------------------------------------
 
     def test_to_html(self):

From c1260e340e7b880705dce58a8db7c3d8490fe344 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sun, 13 May 2012 23:55:24 -0400
Subject: [PATCH 034/114] ENH: store pytz time zones as zone strings in
 HDFStore, close #1232

---
 pandas/io/pytables.py            | 23 ++++++++++++++++++++---
 pandas/io/tests/test_pytables.py | 16 +++++++++++++++-
 pandas/tseries/index.py          |  6 +++---
 3 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 65baa69d7c50c..1c9aac8d4cded 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -585,6 +585,9 @@ def _write_index(self, group, key, index):
             if hasattr(index, 'freq'):
                 node._v_attrs.freq = index.freq
 
+            if hasattr(index, 'tz') and index.tz is not None:
+                node._v_attrs.tz = index.tz.zone
+
     def _read_index(self, group, key):
         variety = getattr(group._v_attrs, '%s_variety' % key)
 
@@ -668,15 +671,21 @@ def _read_index_node(self, node):
             name = node._v_attrs.name
 
         index_class = getattr(node._v_attrs, 'index_class', Index)
+
+        factory = _get_index_factory(index_class)
+
         kwargs = {}
         if 'freq' in node._v_attrs:
             kwargs['freq'] = node._v_attrs['freq']
 
+        if 'tz' in node._v_attrs:
+            kwargs['tz'] = node._v_attrs['tz']
+
         if kind in ('date', 'datetime'):
-            index = index_class(_unconvert_index(data, kind), dtype=object,
-                                **kwargs)
+            index = factory(_unconvert_index(data, kind), dtype=object,
+                            **kwargs)
         else:
-            index = index_class(_unconvert_index(data, kind), **kwargs)
+            index = factory(_unconvert_index(data, kind), **kwargs)
 
         index.name = name
 
@@ -1085,3 +1094,11 @@ def select_coords(self):
         """
         self.values = self.table.getWhereList(self.the_condition)
 
+def _get_index_factory(klass):
+    if klass == DatetimeIndex:
+        def f(values, freq=None, tz=None):
+            return DatetimeIndex._simple_new(values, None, freq=freq,
+                                             tz=tz)
+        return f
+    return klass
+
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 44dff4c4810b5..6cb97e3bcf082 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -8,7 +8,8 @@
 from datetime import datetime
 import numpy as np
 
-from pandas import Series, DataFrame, Panel, MultiIndex, bdate_range
+from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
+                    date_range)
 from pandas.io.pytables import HDFStore, get_store
 import pandas.util.testing as tm
 from pandas.tests.test_series import assert_series_equal
@@ -338,6 +339,19 @@ def test_can_serialize_dates(self):
         frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
         self._check_roundtrip(frame, tm.assert_frame_equal)
 
+    def test_timezones(self):
+        rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
+        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
+        try:
+            store = HDFStore(self.scratchpath)
+            store['frame'] = frame
+            recons = store['frame']
+            self.assert_(recons.index.equals(rng))
+            self.assertEquals(rng.tz, recons.index.tz)
+        finally:
+            store.close()
+            os.remove(self.scratchpath)
+
     def test_store_hierarchical(self):
         index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                    ['one', 'two', 'three']],
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 83badec6d757b..36814876f4e17 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -322,11 +322,11 @@ def _generate(cls, start, end, periods, name, offset,
         return index
 
     @classmethod
-    def _simple_new(cls, values, name, offset, tz):
+    def _simple_new(cls, values, name, freq=None, tz=None):
         result = values.view(cls)
         result.name = name
-        result.offset = offset
-        result.tz = tz
+        result.offset = freq
+        result.tz = tools._maybe_get_tz(tz)
 
         return result
 

From 4c32ab8ca642238ab13e69837cd860636c07b764 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Mon, 14 May 2012 09:12:28 -0400
Subject: [PATCH 035/114] Stop storing class reference in HDFStore #1235

---
 pandas/io/pytables.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 1c9aac8d4cded..951070c923cc1 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -580,7 +580,7 @@ def _write_index(self, group, key, index):
             node._v_attrs.name = index.name
 
             if isinstance(index, (DatetimeIndex, PeriodIndex, IntIndex)):
-                node._v_attrs.index_class = type(index)
+                node._v_attrs.index_class = _class_to_alias(type(index))
 
             if hasattr(index, 'freq'):
                 node._v_attrs.freq = index.freq
@@ -670,9 +670,7 @@ def _read_index_node(self, node):
         if 'name' in node._v_attrs:
             name = node._v_attrs.name
 
-        index_class = getattr(node._v_attrs, 'index_class', Index)
-
-        factory = _get_index_factory(index_class)
+        index_class = _alias_to_class(getattr(node._v_attrs, 'index_class', ''))
 
         kwargs = {}
         if 'freq' in node._v_attrs:
@@ -1012,6 +1010,22 @@ def _is_table_type(group):
         # new node, e.g.
         return False
 
+_index_type_map = {DatetimeIndex : 'datetime',
+                   PeriodIndex : 'period',
+                   IntIndex : 'sparse integer'}
+
+_reverse_index_map = {}
+for k, v in _index_type_map.iteritems():
+    _reverse_index_map[v] = k
+
+def _class_to_alias(cls):
+    return _index_type_map.get(cls, '')
+
+def _alias_to_class(alias):
+    if isinstance(alias, type):
+        return alias
+    return _reverse_index_map.get(alias, Index)
+
 class Selection(object):
     """
     Carries out a selection operation on a tables.Table object.

From e057ad53e215bafc67bc4ec945b9aa6b64ee72f6 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Mon, 14 May 2012 09:26:37 -0400
Subject: [PATCH 036/114] removed extraneous IntIndex instance test

---
 pandas/io/pytables.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 951070c923cc1..b1ac81cbc2aa1 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -579,7 +579,7 @@ def _write_index(self, group, key, index):
             node._v_attrs.kind = kind
             node._v_attrs.name = index.name
 
-            if isinstance(index, (DatetimeIndex, PeriodIndex, IntIndex)):
+            if isinstance(index, (DatetimeIndex, PeriodIndex)):
                 node._v_attrs.index_class = _class_to_alias(type(index))
 
             if hasattr(index, 'freq'):
@@ -1011,8 +1011,7 @@ def _is_table_type(group):
         return False
 
 _index_type_map = {DatetimeIndex : 'datetime',
-                   PeriodIndex : 'period',
-                   IntIndex : 'sparse integer'}
+                   PeriodIndex : 'period'}
 
 _reverse_index_map = {}
 for k, v in _index_type_map.iteritems():

From 0cdfe754a7b817f3a3d6b0305f7d920569bc1c1e Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 09:58:32 -0400
Subject: [PATCH 037/114] BUG: fix rebase conflict from #1236

---
 pandas/io/pytables.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index b1ac81cbc2aa1..dec9616cfba8c 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -671,6 +671,7 @@ def _read_index_node(self, node):
             name = node._v_attrs.name
 
         index_class = _alias_to_class(getattr(node._v_attrs, 'index_class', ''))
+        factory = _get_index_factory(index_class)
 
         kwargs = {}
         if 'freq' in node._v_attrs:

From 8d27185ff4f0051580d67c048900b768d858476f Mon Sep 17 00:00:00 2001
From: RuiDC <RuiDC@Yahoo.com>
Date: Fri, 11 May 2012 10:39:46 +0200
Subject: [PATCH 038/114] treat XLRD.XL_CELL_ERROR as NaN

---
 pandas/io/parsers.py            |   4 +++-
 pandas/io/tests/test3.xls       | Bin 0 -> 23040 bytes
 pandas/io/tests/test_parsers.py |  11 +++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 pandas/io/tests/test3.xls

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index e218fdce98380..5912b3c9732cf 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1001,7 +1001,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
                    parse_dates=False, date_parser=None, na_values=None,
                    thousands=None, chunksize=None):
         from datetime import MINYEAR, time, datetime
-        from xlrd import xldate_as_tuple, XL_CELL_DATE
+        from xlrd import xldate_as_tuple, XL_CELL_DATE, XL_CELL_ERROR
 
         datemode = self.book.datemode
         sheet = self.book.sheet_by_name(sheetname)
@@ -1017,6 +1017,8 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
                         value = time(*dt[3:])
                     else:
                         value = datetime(*dt)
+				if typ == XL_CELL_ERROR:
+					value = np.nan
                 row.append(value)
             data.append(row)
 
diff --git a/pandas/io/tests/test3.xls b/pandas/io/tests/test3.xls
new file mode 100644
index 0000000000000000000000000000000000000000..f73943d6779517e5448aec76fe1b7cfd526f37ae
GIT binary patch
literal 23040
zcmeHPYiu0V6+ScGwa0NDcH%fs?6ni;;n*SBbwWZOo5VbY^C$!*70_g3uM;cAj+`}3
zB%&Bv{uD~Vra>icDW#|nDDM(bX%i#_t>m;7C8DGeg(^i<RINy<S`aY(&YiupGk0gk
zyF99*&RyNvnKSo(=gc|hp8J@)``h1Cy?)<gGv6R3t)&wB$SkK)2VKMYB6(d&xNn+#
zDZiU=4pNvtM;a(MV93%ab;VDX{c2F7QW*RV(&^F2b2t+H3He&uN8^dE9O#a9rebY<
z`!lK7XPq_(8B!X{Z~~6EMk^%^v=SP@v5IC0=_ZTg6mfh+lzmnl*U)5Ys=rVl?x|n7
z0tke<+x2y#+}FjiK^!OIsL?S|=d_?KrCK_J@&?_(?yl3#Wm+4hsE4{~7xm-#-zqVa
zehC50!$J<yZjNf}1%H;MFd6n}8m*vbS%vm<T%Uc4l)z~ADB*t6$e1P0K!Yk5HwNpo
zYD|4rk6B{%m?hSZQ9`4nq7PM4jatVjK`0UI8yc05i7{>r44tNpQ9^@{x^1gI3qcM)
zNoymsDGDCca7q8-hB?i1<}6utZA<Q=Hd2q+KihTxnk&<t*SQNdP#K-)EVOuu8wc05
zk@*m{K`oVsCW@}2CR$Fk!xE(_CM4U@B4x`F>=F$v4Y<FfW!;XJ+DJ1kr1@MuSMXZL
zIn!O`+o+k=s_Y<09!HDjWSUIo*tViyJm0}qB4nkIl?C%$z1OK4EraNvW=worhZ8ZT
zbH6L-@5);t{gdd2>uMCUAo!>&`g}5bD{`+}|2F+6KJ*{^&>!}pAN8R>;zNJPhyHCJ
z`mH{6KY9G{pAa}TP>b+GTMysyp<D9gm$&4}r+?oE=P@7plRorc_|U)RLx0AHUaUMn
z6*w3dDf+Se1fwd8epgl1-4%CNP~?axKiQ^tbseueUPqB*f?kRUBJ})k6c9=J=RzNP
zvk$$=lU@x>uXs`+>9&0>e>NojW!i$%I*h`EoaJshs$2ANIeHAIlk({Af+~_#ej?6=
zJhpx;|F4qtmuV+XXG)w2l5WfSU{-HgoRO-knu;3e`5r-s05-m*XG{O3=o7v4Tqesa
za!!@>moW&WwYDCnNV-x!3c83h4m;jxRMU;DP&*~*rKfJGJ7l!bAs3ws=rjTGzXWGI
z02O@<`i+U`t@Zp0))=*j3f5q-hzcx7Q6fc(t-vZPiUaqJMO9$q7FB_bQd9+Q7DZLy
z_EJ;@Zm307;K)=|1#V(RRp2&UR0VF7ffd#(?SZ3ciGO>Dh=!8xVwaP{szZhf!Xc#q
z5Oza<5RTggfUs-&gK*F&0EFGuAEdf45O!gI5DsevNWyOI55j%=m>{F=!PdbPq!5r`
z>r5{UB-lDL3IhqY4mzwt9w*p3vkC(VwhrcQ4oPs~?n|t;6Hf$N2XnhZK!UAPTNp^N
zb<pz`B1y1yVugVOTc^G-kYMXzIz0B{5CvOjPGKOy)|p!vNU(Jp3j+zZ&b-1vf~_;Z
zFpyyDEU-a%yfi|krfVF=ZHI4|FaFxhg~vOU&TAn`l!?xZ%f5ezyg|;LJ7<HGWq}VL
zK5WLa$?%|+J7~ZbX`Bb`kf932YxI^-Jh0>B`D70~@IanW$z(E5C?`z{#WV|=NDAeo
z<&R?dr9$AiKpiJH7h9pF*b0k_t$^j9>@%>n*?=y^<_}AjpqSNras|vT+!dscz3_LZ
zkEvsur8Wqgle3E}`wkO>!$`ak5HlGJk|+d3?QT@NwgM#Q2zif_@6O9mA=k!|$s4?t
zXpVl%lnwRMZ)2)amfNcEZ1}Q4n}eDyRK9y$-~t&*`?!bs_J{*SZS5;;^{_7U=9ALL
zOmox*k?MK;@yGL8yOSncJJWzIl5FixTE5LzqO8+qNpG8Z#l_@j7J@E>P&v=b(^P)U
zTIH&sx~g`_xLWBBR-;11e=fs%SDUJ{`O5AfUy?PpB<yZ5hFhP*`edpEUy3hjjJ3qp
zcXXtCGfRhzjWonPX*Ow*T5|%$X$dt_j9QS_QwKh2>ZJ_c6>)IU@D{KvlI}Y5<~wGT
z&sj}i*T`!V%%)0Z6VGSU5WvO?M1oB;-F5EFAH3O2R@o%-*~|-IV}&ZgrZnC4_B*e6
zvzel@Y0YQT6u^dqSypLf>8`VX{Hr&cYL(5hd^U>%*l-BUvN6(Kr~md3Z#Gj^Hp}zb
z!~@t^K}{&FJl*x-UtaZQQ^RapgFUSdY+4KOw9)pbN6&h*nWnPw@U%GrY@D7p+WzL#
z$Gq80SJ`-Y+WY`EPEQ+c|NYr#z1hrA*?4%`q5w8dPaAC?J#pHb%}kYzho_w%z{crm
zqwU9Dd&`^6ER~IirzHZ|I6ZB&{ex3adb63$Y?cRmS`2KK7vO2}#5bRK#bUz&4atsa
z=c#NwJZ)|O8>gqm6Q_^d<;|v6W#i##3j)|UJuRMi{*|NNZ0b}t9-h`5z{crm@x)`_
zebJjuOl9NYX-fjwI6W<%7&>{zn@zpS#>3NE1K2n{EuMJ)_&2=RjK`~M%v<e#bG_1U
zo@bW%_LW4NZ~D16=$srVJW2(vS3sR!gjXV4HhP23&4I#yRM18R)af%sM}PK3Z_vgZ
zD7-}lU8sOMJ%nhm^LyT)^Kzi@4HdLS0d@KV(G?H9;0-!I2MSM6L6<6^PU{o>>&PWu
zpaw1AVcCXM=a8||ZFy~$Swb5yaqF~u;3$v2uqeQzBR=PqgKQ+J#Lfh);kTuFItF@D
zneM*cA>$@Dpk8BEkhR&Ug9fn8CxxL`H}zo%#v^9#dl{4m#*zWADhLLpgf~RE$rDcv
zg`HQ1>#E#!HLOp<jtjhvXxo$S*c)q0_w)=IhZVV|nd2bW9t;LMXfK9_Z8+<}$0-%B
z7>GfkGk{8|c-&rUOJ!Y2sjMq0m31|&M`+Pti8*A%+@eHFz@!Z-xgMVsSP^mA=D|{w
zU7(aL#s0-y*%)1ep5QvWPRn2fb>=B`BBp^l1K4TRFUGzdbkMG`c(4pL7ArN%%<*|O
z+`Oh@ZwTIGTzp_(PpUVS>FYlj+nPR*88R+)L+Q)RO1hX1V7ndLzE{}44}u(o&bLCG
z1Mml)rbz3vv9s<~j%ULJ{nEZZIO-}lt~S$*(4|mXpW>>#wji;<Okz#McpvLym!(sk
z-MzbGm@6oPmQ+Hsmq8%*{cZ@l8v<H$1mOZ`Fm;z7vf$Vi%OIH*2#58Fm}EG>5&Kl|
zDi3fg{`r8L$sZi&F+77OoZyIoD)?j%a4Qb_fLlWYU4XYb!4V6UfKW2U1DxZbWwA1+
zT88y;v<!x%%X{|?z!e7ECey3&JeNbW`{0Xg^j!1jwG3%AUhw#Y&A$=@B*dlm?%uuW
z&TRX?LJ?*<;!?XX^j;Xf(-i?qY;Uw<k&HgPz)=dtji(DBKW$16WcpJGFG^t6W+O0f
z0x&;mKYWS<i#4MW@f0f@@_9$F43c$txYQt_IVRk)Y6O;>;OYG$KrmER`x;JjlhtzU
zSp>wUzD#<^*yYC1$DzG2Mg~F=o=-+zUMG<ujn_DGf!4Z`A<epyA<eoP)+=ZwVHJJz
zKt@>n%L>z}fZi;0kZ*Hm$ma;@vN;oSl2}(#66;DzVqI~-yl^lI-OY825RJkRTky>N
z_Qao6#;&@GU3F`BrU%+u>aJBX3Fg@f@5sP#tgn1;*BD~8fauGx2PkU~B{KvBE8Mbv
zL^zdeeS!5Z<B0XN@`_!oiq@caq;0AGUJhAScul$m)hiIGw}C@H+Ler2USX9xaiQVc
zjqP;1W0B+zJY5w&TLH35JndBw`Jg!4h-c?uH~_zkLyO&XgYckS32s5V(TnnO%5vH%
zHXYxBr^@hJGd$y}J?V62DO1{cZCFy`OnHf;tE43EF{aZ!4!JDXwifvA$A<3v9rw82
zZE$rBI~||X3}m8HD|Gq*dvv%SXEcPZ>HQfK;{b!A5C^KH4x<JcC)re!{nqISSR8TO
z+3;tLj)Hd7L1Wn4K)cOBV*uXwVC^}tbnQ8>(5v~jNW6^oHLRzfMV<h6;^wDAxs$|s
z6^?}d)SwT3^v+8gcWp`T5R@iyTqKS+<ESA%<d`?Y>4}=Pn{em32=khX{1s99tKv8h
zrMpCWGszoP<@o{|<*npG$N7D{r4@)8IB8HJYat=h?~?L6Ln9brjO4^X4R>12osURY
zLkbi`AI6EF(K3#qW9%T#n^~zKP9mmR#>>$@6uK2#uTUte8=}NoYSLZ-?gr4R;Ye$-
zO!pMO%cFIC$fjFisasuA>e$vhiAtmxx)5l$F^zURib2UdS1!3I$gY<3lGB&q$e$mZ
z@&ig+%5RM7Kc-Ek4zQ1mtspMf<%qC=j}@#kmh}l1_{3H?i}6%wh0oFo4zvDGpMHeS
zu2ghIZZX-}UccqubMI~5Q~BVX1}&QZ#H-vZJ&Me4@%jEbWR9wrA#+sNi98wk05bO=
zcOYYp4zpmQDab!W=74e*m3U7GM+WDQif}n5ePee=f8YMTu1xIW10CreD~dI(cxLnY
zC$)8`KX~2l#xLL;W9=}78VEHIY9Q1=sDV%ep$0+?gc=An5NaUQK&XLG16d6O&HwdN
zCr^zmo>e(~H|GD%?>)@(|L2i;uFvy-p7-<5ceWt&&(pRe^PGMsGSBz-AoHs6K4e}8
zkm~~X;asi*So8FWqCUvJ-?aedvb^?SEtKa@<GIrWjYmNS=LH59YFl6b-u=9?zF*W2
z*9D-HFohZjH4thb)Ig|#Py?X`LJfo(2sIFDAk@JBod$Rw&(md|7xTM(Z1dwyieg61
z^L%X875j>KD$Vcoc^=O@l=!b)c&=ZC%<uepzAtxG@tmL61*RkOpK9>`!}4ER@IO-U
zA4>4lybd{rT#wB6=iqxTaw9VT55Ron1;`7L`TmE<{2iuGR|8jKlOpfCjA7$B|I(8;
zn-`O6C@S`Vu~h6wF@ujpC&ZC1vA*0#wxgtM1NVOXs9=|nW3MC^{EdHrwfVCQg;w02
z-FFwItHhQ~{xtzVJMX#f5;8F*_wx%s+y&szZHU`Xq5SzjVnpM?SClRSH~zWD_1Hkl
Q7AQ`h6(8gI8yx)q1OB=L6aWAK

literal 0
HcmV?d00001

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 02fc25329e7bc..cc67e895671ef 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -376,6 +376,17 @@ def test_excel_stop_iterator(self):
         parsed = excel_data.parse('Sheet1')
         expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
         assert_frame_equal(parsed, expected)
+		
+	def test_excel_cell_error_na(self):
+        try:
+            import xlrd
+        except ImportError:
+            raise nose.SkipTest('xlrd not installed, skipping')
+
+        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
+        parsed = excel_data.parse('Sheet1')
+        expected = DataFrame([[np.nan]], columns=['Test'])
+        assert_frame_equal(parsed, expected)
 
     def test_excel_table(self):
         try:

From 1e6aea57306fbb3a467bda56e94063c819b126b5 Mon Sep 17 00:00:00 2001
From: RuiDC <RuiDC@Yahoo.com>
Date: Fri, 11 May 2012 11:12:29 +0200
Subject: [PATCH 039/114] replace tabs with spaces

---
 pandas/io/parsers.py            | 4 ++--
 pandas/io/tests/test_parsers.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 5912b3c9732cf..a12dca4b5e785 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1017,8 +1017,8 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
                         value = time(*dt[3:])
                     else:
                         value = datetime(*dt)
-				if typ == XL_CELL_ERROR:
-					value = np.nan
+                if typ == XL_CELL_ERROR:
+                    value = np.nan
                 row.append(value)
             data.append(row)
 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index cc67e895671ef..92022075d6c5e 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -376,8 +376,8 @@ def test_excel_stop_iterator(self):
         parsed = excel_data.parse('Sheet1')
         expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
         assert_frame_equal(parsed, expected)
-		
-	def test_excel_cell_error_na(self):
+        
+    def test_excel_cell_error_na(self):
         try:
             import xlrd
         except ImportError:

From 63952a844ff72af76f27d6285245dcb34f871826 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 10:13:52 -0400
Subject: [PATCH 040/114] RLS: release note

---
 RELEASE.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 32c3844810eb8..a82e511c12cb6 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -79,6 +79,7 @@ pandas 0.8.0
     cases. Fix pivot table bug (#1181)
   - Fix formatting of MultiIndex on Series/DataFrame when index name coincides
     with label (#1217)
+  - Handle Excel 2003 #N/A as NaN from xlrd (#1213, #1225)
 
 pandas 0.7.3
 ============

From 349bccb3891afb793d96a4683f076149823f4bf5 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 13:16:43 -0400
Subject: [PATCH 041/114] ENH: convert multiple text file columns to a single
 date column #1186

---
 pandas/io/parsers.py            | 90 +++++++++++++++++++++++++++++++--
 pandas/io/tests/test_parsers.py | 40 +++++++++++++++
 2 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a12dca4b5e785..a275864de767a 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -24,6 +24,9 @@ def next(x):
 
 from pandas.util.decorators import Appender
 
+class DateConversionError(Exception):
+    pass
+
 _parser_params = """Also supports optionally iterating or breaking of the file
 into chunks.
 
@@ -51,6 +54,9 @@ def next(x):
 date_parser : function
     Function to use for converting dates to strings. Defaults to
     dateutil.parser
+date_conversion : list or dict, default None
+    Can combine multiple columns in date-time specification
+    Newly created columns are prepended to the output
 dayfirst : boolean, default False
     DD/MM format dates, international and European format
 thousands : str, default None
@@ -186,6 +192,7 @@ def read_csv(filepath_or_buffer,
              parse_dates=False,
              dayfirst=False,
              date_parser=None,
+             date_conversion=None,
              nrows=None,
              iterator=False,
              chunksize=None,
@@ -216,6 +223,7 @@ def read_table(filepath_or_buffer,
                parse_dates=False,
                dayfirst=False,
                date_parser=None,
+               date_conversion=None,
                nrows=None,
                iterator=False,
                chunksize=None,
@@ -250,6 +258,7 @@ def read_fwf(filepath_or_buffer,
              parse_dates=False,
              dayfirst=False,
              date_parser=None,
+             date_conversion=None,
              nrows=None,
              iterator=False,
              chunksize=None,
@@ -351,6 +360,7 @@ class TextParser(object):
         Comment out remainder of line
     parse_dates : boolean, default False
     date_parser : function, default None
+    date_conversion : list or dict, default None
     skiprows : list of integers
         Row numbers to skip
     skip_footer : int
@@ -362,8 +372,8 @@ class TextParser(object):
     def __init__(self, f, delimiter=None, names=None, header=0,
                  index_col=None, na_values=None, thousands=None,
                  comment=None, parse_dates=False,
-                 date_parser=None, dayfirst=False, chunksize=None,
-                 skiprows=None, skip_footer=0, converters=None,
+                 date_parser=None, date_conversion=None, dayfirst=False,
+                 chunksize=None, skiprows=None, skip_footer=0, converters=None,
                  verbose=False, encoding=None):
         """
         Workhorse function for processing nested list into DataFrame
@@ -382,6 +392,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
 
         self.parse_dates = parse_dates
         self.date_parser = date_parser
+        self.date_conversion = date_conversion
         self.dayfirst = dayfirst
 
         if com.is_integer(skiprows):
@@ -745,9 +756,11 @@ def get_chunk(self, rows=None):
                 data[x] = lib.try_parse_dates(data[x], parser=self.date_parser,
                                               dayfirst=self.dayfirst)
 
+        data, columns = self._process_date_conversion(data, self.columns)
+
         data = _convert_to_ndarrays(data, self.na_values, self.verbose)
 
-        return DataFrame(data=data, columns=self.columns, index=index)
+        return DataFrame(data=data, columns=columns, index=index)
 
     def _find_line_number(self, exp_len, chunk_len, chunk_i):
         if exp_len is None:
@@ -778,6 +791,52 @@ def _should_parse_dates(self, i):
                 name = self.index_name[i]
             return i in to_parse or name in to_parse
 
+    def _process_date_conversion(self, data_dict, columns):
+        if self.date_conversion is None:
+            return data_dict, columns
+
+        new_cols = []
+        new_data = {}
+
+        def date_converter(*date_cols):
+            if self.date_parser is None:
+                return lib.try_parse_dates(_concat_date_cols(date_cols),
+                                           dayfirst=self.dayfirst)
+            else:
+                try:
+                    return self.date_parser(date_cols)
+                except:
+                    return lib.try_parse_dates(_concat_date_cols(date_cols),
+                                               parser=self.date_parser,
+                                               dayfirst=self.dayfirst)
+
+        if isinstance(self.date_conversion, list):
+            # list of column lists
+            for colspec in self.date_conversion:
+                new_name, col = _try_convert_dates(date_converter, colspec,
+                                               data_dict, columns)
+                if new_name in data_dict:
+                    raise ValueError('Result date column already in dict %s' %
+                                     new_name)
+                new_data[new_name] = col
+                new_cols.append(new_name)
+
+        elif isinstance(self.date_conversion, dict):
+            # dict of new name to column list
+            for new_name, colspec in self.date_conversion.iteritems():
+                if new_name in data_dict:
+                    raise ValueError('Date column %s already in dict' %
+                                     new_name)
+
+                _, col = _try_convert_dates(date_converter, colspec, data_dict,
+                                            columns)
+                new_data[new_name] = col
+                new_cols.append(new_name)
+
+        data_dict.update(new_data)
+        new_cols.extend(columns)
+        return data_dict, new_cols
+
     def _get_lines(self, rows=None):
         source = self.data
         lines = self.buf
@@ -860,6 +919,31 @@ def _convert_types(values, na_values):
 
     return result, na_count
 
+def _get_col_names(colspec, columns):
+    colset = set(columns)
+    colnames = []
+    for c in colspec:
+        if c in colset:
+            colnames.append(str(c))
+        elif isinstance(c, int):
+            colnames.append(str(columns[c]))
+    return colnames
+
+def _try_convert_dates(parser, colspec, data_dict, columns):
+    colspec = _get_col_names(colspec, columns)
+    new_name = '_'.join(colspec)
+
+    to_parse = [data_dict[c] for c in colspec if c in data_dict]
+    try:
+        new_col = parser(*to_parse)
+    except DateConversionError:
+        new_col = _concat_date_cols(to_parse)
+    return new_name, new_col
+
+def _concat_date_cols(date_cols):
+    concat = lambda x: ' '.join(x)
+    return np.array(np.apply_along_axis(concat, 0, np.vstack(date_cols)),
+                    dtype=object)
 
 class FixedWidthReader(object):
     """
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 92022075d6c5e..a26c591b576ab 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -12,6 +12,7 @@
 import numpy as np
 
 from pandas import DataFrame, Index, isnull
+import pandas.io.parsers as parsers
 from pandas.io.parsers import (read_csv, read_table, read_fwf,
                                ExcelFile, TextParser)
 from pandas.util.testing import assert_almost_equal, assert_frame_equal, network
@@ -90,6 +91,45 @@ def test_comment_fwf(self):
                       comment='#')
         assert_almost_equal(df.values, expected)
 
+    def test_multiple_date_col(self):
+        # Can use multiple date parsers
+        data = """\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+        def func(*date_cols):
+            return lib.try_parse_dates(parsers._concat_date_cols(date_cols))
+
+        df = read_table(StringIO(data), sep=',', header=None,
+                        date_parser=func,
+                        date_conversion={'nominal' : [1, 2],
+                                         'actual' : [1,3]})
+        self.assert_('nominal' in df)
+        self.assert_('actual' in df)
+        from datetime import datetime
+        d = datetime(1999, 1, 27, 19, 0)
+        self.assert_(df.ix[0, 'nominal'] == d)
+
+        data = """\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+        df = read_table(StringIO(data), sep=',', header=None,
+                        date_conversion=[[1, 2], [1,3]])
+        self.assert_('X.2_X.3' in df)
+        self.assert_('X.2_X.4' in df)
+        from datetime import datetime
+        d = datetime(1999, 1, 27, 19, 0)
+        self.assert_(df.ix[0, 'X.2_X.3'] == d)
+
     def test_malformed(self):
         # all
         data = """ignore

From 52492ddc30b812deb78fdb21d2333f8c30411303 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 13:39:39 -0400
Subject: [PATCH 042/114] Merged extra keyword with parse_dates

---
 pandas/io/parsers.py            | 65 +++++++++++++++------------------
 pandas/io/tests/test_parsers.py |  6 +--
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a275864de767a..985da7b29a167 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -54,9 +54,6 @@ class DateConversionError(Exception):
 date_parser : function
     Function to use for converting dates to strings. Defaults to
     dateutil.parser
-date_conversion : list or dict, default None
-    Can combine multiple columns in date-time specification
-    Newly created columns are prepended to the output
 dayfirst : boolean, default False
     DD/MM format dates, international and European format
 thousands : str, default None
@@ -161,7 +158,8 @@ def _read(cls, filepath_or_buffer, kwds):
             f = com._get_handle(filepath_or_buffer, 'r', encoding=encoding)
 
     if kwds.get('date_parser', None) is not None:
-        kwds['parse_dates'] = True
+        if isinstance(kwds['parse_dates'], bool):
+            kwds['parse_dates'] = True
 
     # Extract some of the arguments (pass chunksize on).
     kwds.pop('filepath_or_buffer')
@@ -192,7 +190,6 @@ def read_csv(filepath_or_buffer,
              parse_dates=False,
              dayfirst=False,
              date_parser=None,
-             date_conversion=None,
              nrows=None,
              iterator=False,
              chunksize=None,
@@ -223,7 +220,6 @@ def read_table(filepath_or_buffer,
                parse_dates=False,
                dayfirst=False,
                date_parser=None,
-               date_conversion=None,
                nrows=None,
                iterator=False,
                chunksize=None,
@@ -258,7 +254,6 @@ def read_fwf(filepath_or_buffer,
              parse_dates=False,
              dayfirst=False,
              date_parser=None,
-             date_conversion=None,
              nrows=None,
              iterator=False,
              chunksize=None,
@@ -360,7 +355,6 @@ class TextParser(object):
         Comment out remainder of line
     parse_dates : boolean, default False
     date_parser : function, default None
-    date_conversion : list or dict, default None
     skiprows : list of integers
         Row numbers to skip
     skip_footer : int
@@ -372,7 +366,7 @@ class TextParser(object):
     def __init__(self, f, delimiter=None, names=None, header=0,
                  index_col=None, na_values=None, thousands=None,
                  comment=None, parse_dates=False,
-                 date_parser=None, date_conversion=None, dayfirst=False,
+                 date_parser=None, dayfirst=False,
                  chunksize=None, skiprows=None, skip_footer=0, converters=None,
                  verbose=False, encoding=None):
         """
@@ -392,7 +386,6 @@ def __init__(self, f, delimiter=None, names=None, header=0,
 
         self.parse_dates = parse_dates
         self.date_parser = date_parser
-        self.date_conversion = date_conversion
         self.dayfirst = dayfirst
 
         if com.is_integer(skiprows):
@@ -747,16 +740,10 @@ def get_chunk(self, rows=None):
                 col = self.columns[col]
             data[col] = lib.map_infer(data[col], f)
 
-        if not isinstance(self.parse_dates, bool):
-            for x in self.parse_dates:
-                if isinstance(x, int) and x not in data:
-                    x = self.orig_columns[x]
-                if x in self.index_col or x in self.index_name:
-                    continue
-                data[x] = lib.try_parse_dates(data[x], parser=self.date_parser,
-                                              dayfirst=self.dayfirst)
-
-        data, columns = self._process_date_conversion(data, self.columns)
+        columns = self.columns
+        if (self.parse_dates is not None and
+            not isinstance(self.parse_dates, bool)):
+            data, columns = self._process_date_conversion(data, columns)
 
         data = _convert_to_ndarrays(data, self.na_values, self.verbose)
 
@@ -792,9 +779,6 @@ def _should_parse_dates(self, i):
             return i in to_parse or name in to_parse
 
     def _process_date_conversion(self, data_dict, columns):
-        if self.date_conversion is None:
-            return data_dict, columns
-
         new_cols = []
         new_data = {}
 
@@ -804,26 +788,33 @@ def date_converter(*date_cols):
                                            dayfirst=self.dayfirst)
             else:
                 try:
-                    return self.date_parser(date_cols)
+                    return self.date_parser(*date_cols)
                 except:
                     return lib.try_parse_dates(_concat_date_cols(date_cols),
                                                parser=self.date_parser,
                                                dayfirst=self.dayfirst)
 
-        if isinstance(self.date_conversion, list):
+        if isinstance(self.parse_dates, list):
             # list of column lists
-            for colspec in self.date_conversion:
-                new_name, col = _try_convert_dates(date_converter, colspec,
-                                               data_dict, columns)
-                if new_name in data_dict:
-                    raise ValueError('Result date column already in dict %s' %
-                                     new_name)
-                new_data[new_name] = col
-                new_cols.append(new_name)
-
-        elif isinstance(self.date_conversion, dict):
+            for colspec in self.parse_dates:
+                if np.isscalar(colspec):
+                    if isinstance(colspec, int) and colspec not in data_dict:
+                        colspec = self.orig_columns[colspec]
+                    if colspec in self.index_col or colspec in self.index_name:
+                        continue
+                    data_dict[colspec] = date_converter(data_dict[colspec])
+                else:
+                    new_name, col = _try_convert_dates(date_converter, colspec,
+                                                       data_dict, columns)
+                    if new_name in data_dict:
+                        raise ValueError('New date column already in dict %s' %
+                                         new_name)
+                    new_data[new_name] = col
+                    new_cols.append(new_name)
+
+        elif isinstance(self.parse_dates, dict):
             # dict of new name to column list
-            for new_name, colspec in self.date_conversion.iteritems():
+            for new_name, colspec in self.parse_dates.iteritems():
                 if new_name in data_dict:
                     raise ValueError('Date column %s already in dict' %
                                      new_name)
@@ -941,6 +932,8 @@ def _try_convert_dates(parser, colspec, data_dict, columns):
     return new_name, new_col
 
 def _concat_date_cols(date_cols):
+    if len(date_cols) == 1:
+        return date_cols[0]
     concat = lambda x: ' '.join(x)
     return np.array(np.apply_along_axis(concat, 0, np.vstack(date_cols)),
                     dtype=object)
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index a26c591b576ab..d169535655636 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -106,8 +106,8 @@ def func(*date_cols):
 
         df = read_table(StringIO(data), sep=',', header=None,
                         date_parser=func,
-                        date_conversion={'nominal' : [1, 2],
-                                         'actual' : [1,3]})
+                        parse_dates={'nominal' : [1, 2],
+                                     'actual' : [1,3]})
         self.assert_('nominal' in df)
         self.assert_('actual' in df)
         from datetime import datetime
@@ -123,7 +123,7 @@ def func(*date_cols):
 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
 """
         df = read_table(StringIO(data), sep=',', header=None,
-                        date_conversion=[[1, 2], [1,3]])
+                        parse_dates=[[1, 2], [1,3]])
         self.assert_('X.2_X.3' in df)
         self.assert_('X.2_X.4' in df)
         from datetime import datetime

From 9c01e7746d1c66c6e6e06bbc38e3350d41a4dbd3 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 13:48:17 -0400
Subject: [PATCH 043/114] TST: VB for multiple date columns

---
 vb_suite/parser.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/vb_suite/parser.py b/vb_suite/parser.py
index 7c2754ca7da07..8c6abafa5b89a 100644
--- a/vb_suite/parser.py
+++ b/vb_suite/parser.py
@@ -50,3 +50,42 @@
                                 setup,
                                 cleanup="os.remove('test.csv')",
                                 start_date=datetime(2012, 5, 7))
+
+setup = common_setup + """
+from pandas import read_table
+from cStringIO import StringIO
+import os
+N = 10000
+K = 8
+data = '''\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+'''
+data = data * 2000
+"""
+cmd = ("read_table(StringIO(data), sep=',', header=None, "
+       "parse_dates=[[1,2], [1,3]])")
+sdate = datetime(2012, 5, 7)
+read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate)
+
+setup = common_setup + """
+from pandas import read_table
+from cStringIO import StringIO
+import os
+N = 10000
+K = 8
+data = '''\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+'''
+data = data * 2000
+"""
+cmd = "read_table(StringIO(data), sep=',', header=None)"
+sdate = datetime(2012, 5, 7)
+read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate)

From 1febe66f800db6e735eedecb488bb5626a269a9b Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 15:53:14 -0400
Subject: [PATCH 044/114] A few related bug fixes

---
 pandas/io/parsers.py            | 64 ++++++++++++++++++---------------
 pandas/io/tests/test_parsers.py | 13 +++++++
 vb_suite/parser.py              | 12 +++----
 3 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 985da7b29a167..b8cc2f0d192a3 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -676,7 +676,6 @@ def get_chunk(self, rows=None):
 
         zipped_content = list(lib.to_object_array(content).T)
 
-        # no index column specified, so infer that's what is wanted
         if self.index_col is not None:
             if np.isscalar(self.index_col):
                 index = zipped_content.pop(self.index_col)
@@ -690,9 +689,8 @@ def get_chunk(self, rows=None):
                     zipped_content.pop(i)
 
             if np.isscalar(self.index_col):
-                if self._should_parse_dates(0):
-                    index = lib.try_parse_dates(index, parser=self.date_parser,
-                                                dayfirst=self.dayfirst)
+                if self._should_parse_dates(self.index_col):
+                    index = self._conv_date(index)
                 index, na_count = _convert_types(index, self.na_values)
                 index = Index(index, name=self.index_name)
                 if self.verbose and na_count:
@@ -700,9 +698,8 @@ def get_chunk(self, rows=None):
             else:
                 arrays = []
                 for i, arr in enumerate(index):
-                    if self._should_parse_dates(i):
-                        arr = lib.try_parse_dates(arr, parser=self.date_parser,
-                                                  dayfirst=self.dayfirst)
+                    if self._should_parse_dates(self.index_col[i]):
+                        arr = self._conv_date(arr)
                     arr, _ = _convert_types(arr, self.na_values)
                     arrays.append(arr)
                 index = MultiIndex.from_arrays(arrays, names=self.index_name)
@@ -741,9 +738,8 @@ def get_chunk(self, rows=None):
             data[col] = lib.map_infer(data[col], f)
 
         columns = self.columns
-        if (self.parse_dates is not None and
-            not isinstance(self.parse_dates, bool)):
-            data, columns = self._process_date_conversion(data, columns)
+        if self.parse_dates is not None:
+            data, columns = self._process_date_conversion(data)
 
         data = _convert_to_ndarrays(data, self.na_values, self.verbose)
 
@@ -778,21 +774,25 @@ def _should_parse_dates(self, i):
                 name = self.index_name[i]
             return i in to_parse or name in to_parse
 
-    def _process_date_conversion(self, data_dict, columns):
+    def _conv_date(self, *date_cols):
+        if self.date_parser is None:
+            return lib.try_parse_dates(_concat_date_cols(date_cols),
+                                       dayfirst=self.dayfirst)
+        else:
+            try:
+                return self.date_parser(*date_cols)
+            except:
+                return lib.try_parse_dates(_concat_date_cols(date_cols),
+                                           parser=self.date_parser,
+                                           dayfirst=self.dayfirst)
+
+    def _process_date_conversion(self, data_dict):
         new_cols = []
         new_data = {}
+        columns = self.columns
 
-        def date_converter(*date_cols):
-            if self.date_parser is None:
-                return lib.try_parse_dates(_concat_date_cols(date_cols),
-                                           dayfirst=self.dayfirst)
-            else:
-                try:
-                    return self.date_parser(*date_cols)
-                except:
-                    return lib.try_parse_dates(_concat_date_cols(date_cols),
-                                               parser=self.date_parser,
-                                               dayfirst=self.dayfirst)
+        if self.parse_dates is None or isinstance(self.parse_dates, bool):
+            return data_dict, columns
 
         if isinstance(self.parse_dates, list):
             # list of column lists
@@ -800,12 +800,12 @@ def date_converter(*date_cols):
                 if np.isscalar(colspec):
                     if isinstance(colspec, int) and colspec not in data_dict:
                         colspec = self.orig_columns[colspec]
-                    if colspec in self.index_col or colspec in self.index_name:
+                    if self._isindex(colspec):
                         continue
-                    data_dict[colspec] = date_converter(data_dict[colspec])
+                    data_dict[colspec] = self._conv_date(data_dict[colspec])
                 else:
-                    new_name, col = _try_convert_dates(date_converter, colspec,
-                                                       data_dict, columns)
+                    new_name, col = _try_convert_dates(self._conv_date, colspec,
+                                        data_dict, self.orig_columns)
                     if new_name in data_dict:
                         raise ValueError('New date column already in dict %s' %
                                          new_name)
@@ -819,8 +819,8 @@ def date_converter(*date_cols):
                     raise ValueError('Date column %s already in dict' %
                                      new_name)
 
-                _, col = _try_convert_dates(date_converter, colspec, data_dict,
-                                            columns)
+                _, col = _try_convert_dates(self._conv_date, colspec, data_dict,
+                                            self.orig_columns)
                 new_data[new_name] = col
                 new_cols.append(new_name)
 
@@ -828,6 +828,14 @@ def date_converter(*date_cols):
         new_cols.extend(columns)
         return data_dict, new_cols
 
+    def _isindex(self, colspec):
+        return (colspec == self.index_col or
+                (isinstance(self.index_col, list) and
+                 colspec in self.index_col) or
+                (colspec == self.index_name or
+                 (isinstance(self.index_name, list) and
+                  colspec in self.index_name)))
+
     def _get_lines(self, rows=None):
         source = self.data
         lines = self.buf
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index d169535655636..e8589757c54d9 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -130,6 +130,19 @@ def func(*date_cols):
         d = datetime(1999, 1, 27, 19, 0)
         self.assert_(df.ix[0, 'X.2_X.3'] == d)
 
+        data = '''\
+KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+'''
+        df = read_table(StringIO(data), sep=',', header=None,
+                        parse_dates=[1], index_col=1)
+        from datetime import datetime
+        d = datetime(1999, 1, 27, 19, 0)
+        self.assert_(df.index[0] == d)
+
     def test_malformed(self):
         # all
         data = """ignore
diff --git a/vb_suite/parser.py b/vb_suite/parser.py
index 8c6abafa5b89a..946e1327578c0 100644
--- a/vb_suite/parser.py
+++ b/vb_suite/parser.py
@@ -78,14 +78,14 @@
 N = 10000
 K = 8
 data = '''\
-KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
-KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
-KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
-KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
-KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
 '''
 data = data * 2000
 """
-cmd = "read_table(StringIO(data), sep=',', header=None)"
+cmd = "read_table(StringIO(data), sep=',', header=None, parse_dates=[1])"
 sdate = datetime(2012, 5, 7)
 read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate)

From 3fdf18ae777f0e44d3728125787f449e7aaf4156 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 10:49:52 -0400
Subject: [PATCH 045/114] TST: test with headers

---
 pandas/io/parsers.py            | 17 +++++++++++------
 pandas/io/tests/test_parsers.py | 23 ++++++++++++++++++-----
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index b8cc2f0d192a3..aeb36963c69c8 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -49,8 +49,12 @@ class DateConversionError(Exception):
 na_values : list-like or dict, default None
     Additional strings to recognize as NA/NaN. If dict passed, specific
     per-column NA values
-parse_dates : boolean or list of column numbers/name, default False
-    Attempt to parse dates in the indicated columns
+parse_dates : boolean, list of ints or names, list of lists, or dict
+    True -> try parsing all columns
+    [1, 2, 3] -> try parsing columns 1, 2, 3
+    [[1, 3]] -> combine columns 1 and 3 and parse as date (for dates split
+                across multiple columns), and munge column names
+    {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo'
 date_parser : function
     Function to use for converting dates to strings. Defaults to
     dateutil.parser
@@ -936,15 +940,16 @@ def _try_convert_dates(parser, colspec, data_dict, columns):
     try:
         new_col = parser(*to_parse)
     except DateConversionError:
-        new_col = _concat_date_cols(to_parse)
+        new_col = parser(_concat_date_cols(to_parse))
     return new_name, new_col
 
 def _concat_date_cols(date_cols):
     if len(date_cols) == 1:
         return date_cols[0]
-    concat = lambda x: ' '.join(x)
-    return np.array(np.apply_along_axis(concat, 0, np.vstack(date_cols)),
-                    dtype=object)
+
+    # stripped = [map(str.strip, x) for x in date_cols]
+    return np.array([' '.join(x) for x in zip(*date_cols)], dtype=object)
+
 
 class FixedWidthReader(object):
     """
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index e8589757c54d9..3960f8523a8d7 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -104,7 +104,7 @@ def test_multiple_date_col(self):
         def func(*date_cols):
             return lib.try_parse_dates(parsers._concat_date_cols(date_cols))
 
-        df = read_table(StringIO(data), sep=',', header=None,
+        df = read_csv(StringIO(data), header=None,
                         date_parser=func,
                         parse_dates={'nominal' : [1, 2],
                                      'actual' : [1,3]})
@@ -122,7 +122,7 @@ def func(*date_cols):
 KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
 KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
 """
-        df = read_table(StringIO(data), sep=',', header=None,
+        df = read_csv(StringIO(data), header=None,
                         parse_dates=[[1, 2], [1,3]])
         self.assert_('X.2_X.3' in df)
         self.assert_('X.2_X.4' in df)
@@ -137,12 +137,25 @@ def func(*date_cols):
 KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
 KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
 '''
-        df = read_table(StringIO(data), sep=',', header=None,
-                        parse_dates=[1], index_col=1)
+        df = read_csv(StringIO(data), sep=',', header=None,
+                      parse_dates=[1], index_col=1)
         from datetime import datetime
         d = datetime(1999, 1, 27, 19, 0)
         self.assert_(df.index[0] == d)
 
+    def test_multiple_date_cols_with_header(self):
+        data = """\
+ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""
+
+        df = read_csv(StringIO(data), parse_dates={'nominal': [1, 2]})
+        self.assert_(not isinstance(df.nominal[0], basestring))
+
     def test_malformed(self):
         # all
         data = """ignore
@@ -429,7 +442,7 @@ def test_excel_stop_iterator(self):
         parsed = excel_data.parse('Sheet1')
         expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
         assert_frame_equal(parsed, expected)
-        
+
     def test_excel_cell_error_na(self):
         try:
             import xlrd

From a89e7b994dc04b1cf02f995b991d6f96e290c68d Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 19:54:03 -0400
Subject: [PATCH 046/114] ENH: maybe upcast masked arrays passed to DataFrame
 constructor

---
 pandas/core/frame.py       |  6 +++-
 pandas/tests/test_frame.py | 67 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2694e9f3e484a..3e36162f544e2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -304,7 +304,11 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
         elif isinstance(data, ma.MaskedArray):
             mask = ma.getmaskarray(data)
             datacopy = ma.copy(data)
-            datacopy[mask] = np.nan
+            if issubclass(data.dtype.type, np.datetime64):
+                datacopy[mask] = lib.NaT
+            else:
+                datacopy = com._maybe_upcast(datacopy)
+                datacopy[mask] = np.nan
             mgr = self._init_ndarray(datacopy, index, columns, dtype=dtype,
                                      copy=copy)
         elif isinstance(data, np.ndarray):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 86a64bdfc4002..b23ba46b44833 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1601,6 +1601,73 @@ def test_constructor_maskedarray(self):
         frame = DataFrame(ma.masked_all((3, 0)))
         self.assert_(len(frame.columns) == 0)
 
+    def test_constructor_maskedarray_nonfloat(self):
+        # masked int promoted to float
+        mat = ma.masked_all((2, 3), dtype=int)
+        # 2-D input
+        frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
+
+        self.assertEqual(len(frame.index), 2)
+        self.assertEqual(len(frame.columns), 3)
+        self.assertTrue(np.all(~np.asarray(frame == frame)))
+
+        # cast type
+        frame = DataFrame(mat, columns=['A', 'B', 'C'],
+                          index=[1, 2], dtype=float)
+        self.assert_(frame.values.dtype == np.float64)
+
+        # Check non-masked values
+        mat2 = ma.copy(mat)
+        mat2[0,0] = 1
+        mat2[1,2] = 2
+        frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
+        self.assertEqual(1, frame['A'][1])
+        self.assertEqual(2, frame['C'][2])
+
+        # masked np.datetime64 stays (use lib.NaT as null)
+        mat = ma.masked_all((2, 3), dtype=np.datetime64)
+        # 2-D input
+        frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
+
+        self.assertEqual(len(frame.index), 2)
+        self.assertEqual(len(frame.columns), 3)
+        self.assertTrue(isnull(frame).values.all())
+
+        # cast type
+        frame = DataFrame(mat, columns=['A', 'B', 'C'],
+                           index=[1, 2], dtype=int)
+        self.assert_(frame.values.dtype == int)
+
+        # Check non-masked values
+        mat2 = ma.copy(mat)
+        mat2[0,0] = 1
+        mat2[1,2] = 2
+        frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
+        self.assertEqual(1, frame['A'].view('i8')[1])
+        self.assertEqual(2, frame['C'].view('i8')[2])
+
+        # masked bool promoted to object
+        mat = ma.masked_all((2, 3), dtype=bool)
+        # 2-D input
+        frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
+
+        self.assertEqual(len(frame.index), 2)
+        self.assertEqual(len(frame.columns), 3)
+        self.assertTrue(np.all(~np.asarray(frame == frame)))
+
+        # cast type
+        frame = DataFrame(mat, columns=['A', 'B', 'C'],
+                           index=[1, 2], dtype=object)
+        self.assert_(frame.values.dtype == object)
+
+        # Check non-masked values
+        mat2 = ma.copy(mat)
+        mat2[0,0] = True
+        mat2[1,2] = False
+        frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
+        self.assertEqual(True, frame['A'][1])
+        self.assertEqual(False, frame['C'][2])
+
     def test_constructor_corner(self):
         df = DataFrame(index=[])
         self.assertEqual(df.values.shape, (0, 0))

From c9af5c500cac4c7cd1c3d5aa2b95cc1472d83d96 Mon Sep 17 00:00:00 2001
From: Luca Beltrame <luca.beltrame@marionegri.it>
Date: Tue, 8 May 2012 10:40:53 +0200
Subject: [PATCH 047/114] ENH: Add support for converting DataFrames to R
 data.frames and matrices, close #350

---
 pandas/rpy/common.py | 109 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py
index afd1f57306b54..56c56136b488a 100644
--- a/pandas/rpy/common.py
+++ b/pandas/rpy/common.py
@@ -12,7 +12,8 @@
 from rpy2.robjects import r
 import rpy2.robjects as robj
 
-__all__ = ['convert_robj', 'load_data']
+__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
+           'convert_to_r_matrix']
 
 def load_data(name, package=None, convert=True):
     if package:
@@ -173,6 +174,81 @@ def convert_robj(obj, use_pandas=True):
 
     raise Exception('Do not know what to do with %s object' % type(obj))
 
+VECTOR_TYPES = {np.float64: robj.FloatVector,
+               np.float32: robj.FloatVector,
+               np.float: robj.FloatVector,
+               np.int: robj.IntVector,
+               np.int32: robj.IntVector,
+               np.int64: robj.IntVector,
+               np.object_: robj.StrVector,
+               np.str: robj.StrVector}
+
+def convert_to_r_dataframe(df, strings_as_factors=False):
+    """
+    Convert a pandas DataFrame to a R data.frame.
+
+    Parameters
+    ----------
+    df: The DataFrame being converted
+    strings_as_factors: Whether to turn strings into R factors (default: False)
+
+    Returns
+    -------
+    A R data.frame
+
+    """
+
+    import rpy2.rlike.container as rlc
+
+    columns = rlc.OrdDict()
+
+    #FIXME: This doesn't handle MultiIndex
+
+    for column in df:
+        value = df[column]
+        value_type = value.dtype.type
+        value = [item if pn.notnull(item) else robj.NA_Logical
+                 for item in value]
+        value = VECTOR_TYPES[value_type](value)
+
+        if not strings_as_factors:
+            I = robj.baseenv.get("I")
+            value = I(value)
+
+        columns[column] = value
+
+    r_dataframe = robj.DataFrame(columns)
+
+    del columns
+
+    r_dataframe.rownames = robj.StrVector(df.index)
+
+    return r_dataframe
+
+
+def convert_to_r_matrix(df, strings_as_factors=False):
+
+    """
+    Convert a pandas DataFrame to a R matrix.
+
+    Parameters
+    ----------
+    df: The DataFrame being converted
+    strings_as_factors: Whether to turn strings into R factors (default: False)
+
+    Returns
+    -------
+    A R matrix
+
+    """
+
+    r_dataframe = convert_to_r_dataframe(df, strings_as_factors)
+    as_matrix = robj.baseenv.get("as.matrix")
+    r_matrix = as_matrix(r_dataframe)
+
+    return r_matrix
+
+
 def test_convert_list():
     obj = r('list(a=1, b=2, c=3)')
 
@@ -213,6 +289,37 @@ def test_convert_matrix():
     assert np.array_equal(converted.index, ['a', 'b', 'c'])
     assert np.array_equal(converted.columns, ['one', 'two', 'three'])
 
+def test_convert_r_dataframe():
+
+    seriesd = _test.getSeriesData()
+    frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
+
+    r_dataframe = convert_to_r_dataframe(frame)
+
+    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
+    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
+
+    for column in r_dataframe.colnames:
+        coldata = r_dataframe.rx2(column)
+        original_data = frame[column]
+        assert np.array_equal(convert_robj(coldata), original_data)
+
+def test_convert_r_matrix():
+
+    seriesd = _test.getSeriesData()
+    frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
+
+    r_dataframe = convert_to_r_matrix(frame)
+
+    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
+    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
+
+    for column in r_dataframe.colnames:
+        coldata = r_dataframe.rx2(column)
+        original_data = frame[column]
+        assert np.array_equal(convert_robj(coldata), original_data)
+
+
 
 if __name__ == '__main__':
     pass

From d17f1d53f0f7ba27591ea999e7b9f4b9bf051217 Mon Sep 17 00:00:00 2001
From: Luca Beltrame <luca.beltrame@marionegri.it>
Date: Tue, 8 May 2012 10:44:32 +0200
Subject: [PATCH 048/114] BUG: Properly handle the case of matrices

---
 pandas/rpy/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py
index 56c56136b488a..4d3620536f2cd 100644
--- a/pandas/rpy/common.py
+++ b/pandas/rpy/common.py
@@ -315,7 +315,7 @@ def test_convert_r_matrix():
     assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
 
     for column in r_dataframe.colnames:
-        coldata = r_dataframe.rx2(column)
+        coldata = r_dataframe.rx(True, column)
         original_data = frame[column]
         assert np.array_equal(convert_robj(coldata), original_data)
 

From ea7f4e1bbdfe8c4a7e01226e68da9e83ce67065c Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 11:05:22 -0400
Subject: [PATCH 049/114] RLS: release notes

---
 RELEASE.rst                     | 4 ++++
 pandas/io/tests/test_parsers.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index a82e511c12cb6..607be2e989141 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -41,6 +41,9 @@ pandas 0.8.0
   - Add ``match`` function to API (#502)
   - Add Cython-optimized first, last, min, max, prod functions to GroupBy (#994,
     #1043)
+  - Dates can be split across multiple columns (#1227, #1186)
+  - Add experimental support for converting pandas DataFrame to R data.frame
+    via rpy2 (#350, #1212)
 
 **Improvements to existing features**
 
@@ -53,6 +56,7 @@ pandas 0.8.0
   - Can pass arrays in addition to column names to DataFrame.set_index (#402)
   - Improve the speed of "square" reindexing of homogeneous DataFrame objects
     by significant margin (#836)
+  - Handle more dtypes when passed MaskedArrays in DataFrame constructor (#406)
 
 **API Changes**
 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 3960f8523a8d7..5fccc5a39c47a 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -156,6 +156,10 @@ def test_multiple_date_cols_with_header(self):
         df = read_csv(StringIO(data), parse_dates={'nominal': [1, 2]})
         self.assert_(not isinstance(df.nominal[0], basestring))
 
+    def test_multiple_skts_example(self):
+        data = "year, month, a, b\n 2001, 01, 0.0, 10.\n 2001, 02, 1.1, 11."
+        pass
+
     def test_malformed(self):
         # all
         data = """ignore

From 4c1eb1b2162793fa28b9724758a310af802e7ca9 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 11:22:54 -0400
Subject: [PATCH 050/114] ENH: optimize join/merge on integer keys, close #682

---
 RELEASE.rst              |  1 +
 pandas/src/hashtable.pyx |  5 +++--
 pandas/tools/merge.py    | 41 ++++++++++++++++++----------------------
 vb_suite/join_merge.py   |  6 ++++++
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 607be2e989141..93575fe2910bd 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -57,6 +57,7 @@ pandas 0.8.0
   - Improve the speed of "square" reindexing of homogeneous DataFrame objects
     by significant margin (#836)
   - Handle more dtypes when passed MaskedArrays in DataFrame constructor (#406)
+  - Improved performance of join operations on integer keys (#682)
 
 **API Changes**
 
diff --git a/pandas/src/hashtable.pyx b/pandas/src/hashtable.pyx
index d6a5b3a442c7e..fea622449b47c 100644
--- a/pandas/src/hashtable.pyx
+++ b/pandas/src/hashtable.pyx
@@ -823,9 +823,10 @@ cdef class Int64Factorizer:
     def get_count(self):
         return self.count
 
-    def factorize(self, ndarray[int64_t] values, sort=False):
+    def factorize(self, ndarray[int64_t] values, sort=False,
+                  na_sentinel=-1):
         labels, counts = self.table.get_labels(values, self.uniques,
-                                               self.count, -1)
+                                               self.count, na_sentinel)
 
         # sort on
         if sort:
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index c26c325b21437..d6f65667929dd 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -304,7 +304,7 @@ def _get_group_keys(self):
         group_sizes = []
 
         for lk, rk in zip(left_keys, right_keys):
-            llab, rlab, count = _factorize_objects(lk, rk, sort=self.sort)
+            llab, rlab, count = _factorize_keys(lk, rk, sort=self.sort)
 
             left_labels.append(llab)
             right_labels.append(rlab)
@@ -321,7 +321,7 @@ def _get_group_keys(self):
             raise Exception('Combinatorial explosion! (boom)')
 
         left_group_key, right_group_key, max_groups = \
-            _factorize_int64(left_group_key, right_group_key,
+            _factorize_keys(left_group_key, right_group_key,
                              sort=self.sort)
         return left_group_key, right_group_key, max_groups
 
@@ -329,7 +329,7 @@ def _get_multiindex_indexer(join_keys, index, sort=False):
     shape = []
     labels = []
     for level, key in zip(index.levels, join_keys):
-        llab, rlab, count = _factorize_objects(level, key, sort=False)
+        llab, rlab, count = _factorize_keys(level, key, sort=False)
         labels.append(rlab)
         shape.append(count)
 
@@ -337,8 +337,8 @@ def _get_multiindex_indexer(join_keys, index, sort=False):
     right_group_key = get_group_index(index.labels, shape)
 
     left_group_key, right_group_key, max_groups = \
-        _factorize_int64(left_group_key, right_group_key,
-                         sort=False)
+        _factorize_keys(left_group_key, right_group_key,
+                        sort=False)
 
     left_indexer, right_indexer = \
         lib.left_outer_join(com._ensure_int64(left_group_key),
@@ -348,7 +348,7 @@ def _get_multiindex_indexer(join_keys, index, sort=False):
     return left_indexer, right_indexer
 
 def _get_single_indexer(join_key, index, sort=False):
-    left_key, right_key, count = _factorize_objects(join_key, index, sort=sort)
+    left_key, right_key, count = _factorize_keys(join_key, index, sort=sort)
 
     left_indexer, right_indexer = \
         lib.left_outer_join(com._ensure_int64(left_key),
@@ -394,26 +394,21 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
     'outer' : lib.full_outer_join,
 }
 
-def _factorize_int64(left_index, right_index, sort=True):
-    rizer = lib.Int64Factorizer(max(len(left_index), len(right_index)))
 
-    # 32-bit compatibility
-    left_index = com._ensure_int64(left_index)
-    right_index = com._ensure_int64(right_index)
-
-    llab, _ = rizer.factorize(left_index)
-    rlab, _ = rizer.factorize(right_index)
-
-    if sort:
-        llab, rlab = _sort_labels(np.array(rizer.uniques), llab, rlab)
-
-    return llab, rlab, rizer.get_count()
+def _factorize_keys(lk, rk, sort=True):
+    if com.is_integer_dtype(lk) and com.is_integer_dtype(rk):
+        klass = lib.Int64Factorizer
+        lk = com._ensure_int64(lk)
+        rk = com._ensure_int64(rk)
+    else:
+        klass = lib.Factorizer
+        lk = com._ensure_object(lk)
+        rk = com._ensure_object(rk)
 
-def _factorize_objects(left_index, right_index, sort=True):
-    rizer = lib.Factorizer(max(len(left_index), len(right_index)))
+    rizer = klass(max(len(lk), len(rk)))
 
-    llab, _ = rizer.factorize(left_index.astype('O'))
-    rlab, _ = rizer.factorize(right_index.astype('O'))
+    llab, _ = rizer.factorize(lk)
+    rlab, _ = rizer.factorize(rk)
 
     count = rizer.get_count()
 
diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py
index 002761a00adf1..657ca398f01bb 100644
--- a/vb_suite/join_merge.py
+++ b/vb_suite/join_merge.py
@@ -66,6 +66,12 @@
               name='join_dataframe_index_multi',
               start_date=datetime(2011, 10, 20))
 
+#----------------------------------------------------------------------
+# Joins on integer keys
+
+join_dataframe_integer_key = Benchmark("merge(df, df2, on='key')", setup,
+                                       start_date=datetime(2011, 10, 20))
+
 #----------------------------------------------------------------------
 # DataFrame joins on index
 

From 8572d54ba60faaeedc886416a6755d3f52b8eae3 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 11:24:39 -0400
Subject: [PATCH 051/114] RLS: release notes for #1081

---
 RELEASE.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 93575fe2910bd..5bbcb54601d30 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -85,6 +85,8 @@ pandas 0.8.0
   - Fix formatting of MultiIndex on Series/DataFrame when index name coincides
     with label (#1217)
   - Handle Excel 2003 #N/A as NaN from xlrd (#1213, #1225)
+  - Fix timestamp locale-related deserialization issues with HDFStore by moving
+    to datetime64 representation (#1081, #809)
 
 pandas 0.7.3
 ============

From 8ecb31bcda10c94e9d5d9a243c7462d3d4fdf07f Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 11:39:40 -0400
Subject: [PATCH 052/114] ENH: efficiently box datetime64 -> Timestamp inside
 Series.__getitem__. close #1058

---
 pandas/core/frame.py                    | 8 +++-----
 pandas/src/engines.pyx                  | 2 ++
 pandas/tseries/tests/test_timeseries.py | 6 ++++++
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3e36162f544e2..6048a6b678d3b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2680,17 +2680,15 @@ def fillna(self, value=None, method='pad', axis=0, inplace=False,
             # Float type values
             if len(self.columns) == 0:
                 return self
-            if np.isscalar(value):
-                new_data = self._data.fillna(value, inplace=inplace)
-            elif isinstance(value, dict):
+            if isinstance(value, dict):
                 result = self if inplace else self.copy()
                 for k, v in value.iteritems():
                     if k not in result:
                         continue
                     result[k].fillna(v, inplace=True)
                 return result
-            else:  # pragma: no cover
-                raise TypeError('Invalid fill value type: %s' % type(value))
+            else:
+                new_data = self._data.fillna(value, inplace=inplace)
 
         if inplace:
             self._data = new_data
diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx
index df92cce1c3efa..809de9e1015ad 100644
--- a/pandas/src/engines.pyx
+++ b/pandas/src/engines.pyx
@@ -79,6 +79,8 @@ cdef class IndexEngine:
         if PySlice_Check(loc) or cnp.PyArray_Check(loc):
             return arr[loc]
         else:
+            if arr.descr.type_num == NPY_DATETIME:
+                return Timestamp(util.get_value_at(arr, loc))
             return util.get_value_at(arr, loc)
 
     cpdef set_value(self, ndarray arr, object key, object value):
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 2628386668082..c6f5c39cdda7c 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -132,6 +132,12 @@ def test_getitem_median_slice_bug(self):
         expected = s[indexer[0]]
         assert_series_equal(result, expected)
 
+    def test_series_box_timestamp(self):
+        rng = date_range('20090415', '20090519', freq='B')
+        s = Series(rng)
+
+        self.assert_(isinstance(s[5], Timestamp))
+
     def test_series_ctor_plus_datetimeindex(self):
         rng = date_range('20090415', '20090519', freq='B')
         data = dict((k, 1) for k in rng)

From 4b56332fb6d2649be6c3f5da308034f96f2cc75a Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 14:22:03 -0400
Subject: [PATCH 053/114] BLD: add modified numpy Cython header

---
 pandas/src/datetime.pxd |   2 +
 pandas/src/numpy.pxd    | 980 ++++++++++++++++++++++++++++++++++++++++
 pandas/tseries/api.py   |   1 +
 3 files changed, 983 insertions(+)
 create mode 100644 pandas/src/numpy.pxd

diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd
index 213f29c5e2605..6ae001c2f0010 100644
--- a/pandas/src/datetime.pxd
+++ b/pandas/src/datetime.pxd
@@ -49,6 +49,8 @@ cdef extern from "numpy/ndarrayobject.h":
             NPY_SAME_KIND_CASTING
             NPY_UNSAFE_CASTING
 
+cdef extern from "numpy/ndarraytypes
+
 cdef extern from "numpy_helper.h":
     npy_datetime unbox_datetime64_scalar(object o)
 
diff --git a/pandas/src/numpy.pxd b/pandas/src/numpy.pxd
new file mode 100644
index 0000000000000..45c2fc184a911
--- /dev/null
+++ b/pandas/src/numpy.pxd
@@ -0,0 +1,980 @@
+# NumPy static imports for Cython
+#
+# If any of the PyArray_* functions are called, import_array must be
+# called first.
+#
+# This also defines backwards-compatability buffer acquisition
+# code for use in Python 2.x (or Python <= 2.5 when NumPy starts
+# implementing PEP-3118 directly).
+#
+# Because of laziness, the format string of the buffer is statically
+# allocated. Increase the size if this is not enough, or submit a
+# patch to do this properly.
+#
+# Author: Dag Sverre Seljebotn
+#
+
+DEF _buffer_format_string_len = 255
+
+cimport cpython.buffer as pybuf
+from cpython.ref cimport Py_INCREF, Py_XDECREF
+from cpython.object cimport PyObject
+cimport libc.stdlib as stdlib
+cimport libc.stdio as stdio
+
+cdef extern from "Python.h":
+    ctypedef int Py_intptr_t
+
+cdef extern from "numpy/arrayobject.h":
+    ctypedef Py_intptr_t npy_intp
+    ctypedef size_t npy_uintp
+
+    cdef enum NPY_TYPES:
+        NPY_BOOL
+        NPY_BYTE
+        NPY_UBYTE
+        NPY_SHORT
+        NPY_USHORT
+        NPY_INT
+        NPY_UINT
+        NPY_LONG
+        NPY_ULONG
+        NPY_LONGLONG
+        NPY_ULONGLONG
+        NPY_FLOAT
+        NPY_DOUBLE
+        NPY_LONGDOUBLE
+        NPY_CFLOAT
+        NPY_CDOUBLE
+        NPY_CLONGDOUBLE
+        NPY_OBJECT
+        NPY_STRING
+        NPY_UNICODE
+        NPY_VOID
+        NPY_NTYPES
+        NPY_NOTYPE
+
+        NPY_INT8
+        NPY_INT16
+        NPY_INT32
+        NPY_INT64
+        NPY_INT128
+        NPY_INT256
+        NPY_UINT8
+        NPY_UINT16
+        NPY_UINT32
+        NPY_UINT64
+        NPY_UINT128
+        NPY_UINT256
+        NPY_FLOAT16
+        NPY_FLOAT32
+        NPY_FLOAT64
+        NPY_FLOAT80
+        NPY_FLOAT96
+        NPY_FLOAT128
+        NPY_FLOAT256
+        NPY_COMPLEX32
+        NPY_COMPLEX64
+        NPY_COMPLEX128
+        NPY_COMPLEX160
+        NPY_COMPLEX192
+        NPY_COMPLEX256
+        NPY_COMPLEX512
+
+        NPY_DATETIME
+
+        NPY_INTP
+
+    ctypedef enum NPY_ORDER:
+        NPY_ANYORDER
+        NPY_CORDER
+        NPY_FORTRANORDER
+
+    ctypedef enum NPY_CLIPMODE:
+        NPY_CLIP
+        NPY_WRAP
+        NPY_RAISE
+
+    ctypedef enum NPY_SCALARKIND:
+        NPY_NOSCALAR,
+        NPY_BOOL_SCALAR,
+        NPY_INTPOS_SCALAR,
+        NPY_INTNEG_SCALAR,
+        NPY_FLOAT_SCALAR,
+        NPY_COMPLEX_SCALAR,
+        NPY_OBJECT_SCALAR
+
+    ctypedef enum NPY_SORTKIND:
+        NPY_QUICKSORT
+        NPY_HEAPSORT
+        NPY_MERGESORT
+
+    ctypedef enum NPY_SEARCHSIDE:
+        NPY_SEARCHLEFT
+        NPY_SEARCHRIGHT
+
+    enum:
+        NPY_C_CONTIGUOUS
+        NPY_F_CONTIGUOUS
+        NPY_CONTIGUOUS
+        NPY_FORTRAN
+        NPY_OWNDATA
+        NPY_FORCECAST
+        NPY_ENSURECOPY
+        NPY_ENSUREARRAY
+        NPY_ELEMENTSTRIDES
+        NPY_ALIGNED
+        NPY_NOTSWAPPED
+        NPY_WRITEABLE
+        NPY_UPDATEIFCOPY
+        NPY_ARR_HAS_DESCR
+
+        NPY_BEHAVED
+        NPY_BEHAVED_NS
+        NPY_CARRAY
+        NPY_CARRAY_RO
+        NPY_FARRAY
+        NPY_FARRAY_RO
+        NPY_DEFAULT
+
+        NPY_IN_ARRAY
+        NPY_OUT_ARRAY
+        NPY_INOUT_ARRAY
+        NPY_IN_FARRAY
+        NPY_OUT_FARRAY
+        NPY_INOUT_FARRAY
+
+        NPY_UPDATE_ALL
+
+    cdef enum:
+        NPY_MAXDIMS
+
+    npy_intp NPY_MAX_ELSIZE
+
+    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *)
+
+    ctypedef class numpy.dtype [object PyArray_Descr]:
+        # Use PyDataType_* macros when possible, however there are no macros
+        # for accessing some of the fields, so some are defined. Please
+        # ask on cython-dev if you need more.
+        cdef int type_num
+        cdef int itemsize "elsize"
+        cdef char byteorder
+        cdef object fields
+        cdef tuple names
+
+    ctypedef extern class numpy.flatiter [object PyArrayIterObject]:
+        # Use through macros
+        pass
+
+    ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject]:
+        # Use through macros
+        pass
+
+    ctypedef struct PyArrayObject:
+        # For use in situations where ndarray can't replace PyArrayObject*,
+        # like PyArrayObject**.
+        pass
+
+    ctypedef class numpy.ndarray [object PyArrayObject]:
+        cdef __cythonbufferdefaults__ = {"mode": "strided"}
+
+        cdef:
+            # Only taking a few of the most commonly used and stable fields.
+            # One should use PyArray_* macros instead to access the C fields.
+            char *data
+            int ndim "nd"
+            npy_intp *shape "dimensions"
+            npy_intp *strides
+            dtype descr
+            PyObject* base
+
+        # Note: This syntax (function definition in pxd files) is an
+        # experimental exception made for __getbuffer__ and __releasebuffer__
+        # -- the details of this may change.
+        def __getbuffer__(ndarray self, Py_buffer* info, int flags):
+            # This implementation of getbuffer is geared towards Cython
+            # requirements, and does not yet fullfill the PEP.
+            # In particular strided access is always provided regardless
+            # of flags
+
+            if info == NULL: return
+
+            cdef int copy_shape, i, ndim
+            cdef int endian_detector = 1
+            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+
+            ndim = PyArray_NDIM(self)
+
+            if sizeof(npy_intp) != sizeof(Py_ssize_t):
+                copy_shape = 1
+            else:
+                copy_shape = 0
+
+            if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+                and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+                raise ValueError(u"ndarray is not C contiguous")
+
+            if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+                and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+                raise ValueError(u"ndarray is not Fortran contiguous")
+
+            info.buf = PyArray_DATA(self)
+            info.ndim = ndim
+            if copy_shape:
+                # Allocate new buffer for strides and shape info.
+                # This is allocated as one block, strides first.
+                info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)
+                info.shape = info.strides + ndim
+                for i in range(ndim):
+                    info.strides[i] = PyArray_STRIDES(self)[i]
+                    info.shape[i] = PyArray_DIMS(self)[i]
+            else:
+                info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+                info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+            info.suboffsets = NULL
+            info.itemsize = PyArray_ITEMSIZE(self)
+            info.readonly = not PyArray_ISWRITEABLE(self)
+
+            cdef int t
+            cdef char* f = NULL
+            cdef dtype descr = self.descr
+            cdef list stack
+            cdef int offset
+
+            cdef bint hasfields = PyDataType_HASFIELDS(descr)
+
+            if not hasfields and not copy_shape:
+                # do not call releasebuffer
+                info.obj = None
+            else:
+                # need to call releasebuffer
+                info.obj = self
+
+            if not hasfields:
+                t = descr.type_num
+                if ((descr.byteorder == '>' and little_endian) or
+                    (descr.byteorder == '<' and not little_endian)):
+                    raise ValueError(u"Non-native byte order not supported")
+                if   t == NPY_BYTE:        f = "b"
+                elif t == NPY_UBYTE:       f = "B"
+                elif t == NPY_SHORT:       f = "h"
+                elif t == NPY_USHORT:      f = "H"
+                elif t == NPY_INT:         f = "i"
+                elif t == NPY_UINT:        f = "I"
+                elif t == NPY_LONG:        f = "l"
+                elif t == NPY_ULONG:       f = "L"
+                elif t == NPY_LONGLONG:    f = "q"
+                elif t == NPY_ULONGLONG:   f = "Q"
+                elif t == NPY_FLOAT:       f = "f"
+                elif t == NPY_DOUBLE:      f = "d"
+                elif t == NPY_LONGDOUBLE:  f = "g"
+                elif t == NPY_CFLOAT:      f = "Zf"
+                elif t == NPY_CDOUBLE:     f = "Zd"
+                elif t == NPY_CLONGDOUBLE: f = "Zg"
+                elif t == NPY_OBJECT:      f = "O"
+                else:
+                    raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+                info.format = f
+                return
+            else:
+                info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+                info.format[0] = '^' # Native data types, manual alignment
+                offset = 0
+                f = _util_dtypestring(descr, info.format + 1,
+                                      info.format + _buffer_format_string_len,
+                                      &offset)
+                f[0] = 0 # Terminate format string
+
+        def __releasebuffer__(ndarray self, Py_buffer* info):
+            if PyArray_HASFIELDS(self):
+                stdlib.free(info.format)
+            if sizeof(npy_intp) != sizeof(Py_ssize_t):
+                stdlib.free(info.strides)
+                # info.shape was stored after info.strides in the same block
+
+
+    ctypedef signed char      npy_bool
+
+    ctypedef signed char      npy_byte
+    ctypedef signed short     npy_short
+    ctypedef signed int       npy_int
+    ctypedef signed long      npy_long
+    ctypedef signed long long npy_longlong
+
+    ctypedef unsigned char      npy_ubyte
+    ctypedef unsigned short     npy_ushort
+    ctypedef unsigned int       npy_uint
+    ctypedef unsigned long      npy_ulong
+    ctypedef unsigned long long npy_ulonglong
+
+    ctypedef float        npy_float
+    ctypedef double       npy_double
+    ctypedef long double  npy_longdouble
+
+    ctypedef signed char        npy_int8
+    ctypedef signed short       npy_int16
+    ctypedef signed int         npy_int32
+    ctypedef signed long long   npy_int64
+    ctypedef signed long long   npy_int96
+    ctypedef signed long long   npy_int128
+
+    ctypedef unsigned char      npy_uint8
+    ctypedef unsigned short     npy_uint16
+    ctypedef unsigned int       npy_uint32
+    ctypedef unsigned long long npy_uint64
+    ctypedef unsigned long long npy_uint96
+    ctypedef unsigned long long npy_uint128
+
+    ctypedef float        npy_float32
+    ctypedef double       npy_float64
+    ctypedef long double  npy_float80
+    ctypedef long double  npy_float96
+    ctypedef long double  npy_float128
+
+    ctypedef struct npy_cfloat:
+        double real
+        double imag
+
+    ctypedef struct npy_cdouble:
+        double real
+        double imag
+
+    ctypedef struct npy_clongdouble:
+        double real
+        double imag
+
+    ctypedef struct npy_complex64:
+        double real
+        double imag
+
+    ctypedef struct npy_complex128:
+        double real
+        double imag
+
+    ctypedef struct npy_complex160:
+        double real
+        double imag
+
+    ctypedef struct npy_complex192:
+        double real
+        double imag
+
+    ctypedef struct npy_complex256:
+        double real
+        double imag
+
+    ctypedef struct PyArray_Dims:
+        npy_intp *ptr
+        int len
+
+    void import_array()
+
+    #
+    # Macros from ndarrayobject.h
+    #
+    bint PyArray_CHKFLAGS(ndarray m, int flags)
+    bint PyArray_ISCONTIGUOUS(ndarray m)
+    bint PyArray_ISWRITEABLE(ndarray m)
+    bint PyArray_ISALIGNED(ndarray m)
+
+    int PyArray_NDIM(ndarray)
+    bint PyArray_ISONESEGMENT(ndarray)
+    bint PyArray_ISFORTRAN(ndarray)
+    int PyArray_FORTRANIF(ndarray)
+
+    void* PyArray_DATA(ndarray)
+    char* PyArray_BYTES(ndarray)
+    npy_intp* PyArray_DIMS(ndarray)
+    npy_intp* PyArray_STRIDES(ndarray)
+    npy_intp PyArray_DIM(ndarray, size_t)
+    npy_intp PyArray_STRIDE(ndarray, size_t)
+
+    # object PyArray_BASE(ndarray) wrong refcount semantics
+    # dtype PyArray_DESCR(ndarray) wrong refcount semantics
+    int PyArray_FLAGS(ndarray)
+    npy_intp PyArray_ITEMSIZE(ndarray)
+    int PyArray_TYPE(ndarray arr)
+
+    object PyArray_GETITEM(ndarray arr, void *itemptr)
+    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj)
+
+    bint PyTypeNum_ISBOOL(int)
+    bint PyTypeNum_ISUNSIGNED(int)
+    bint PyTypeNum_ISSIGNED(int)
+    bint PyTypeNum_ISINTEGER(int)
+    bint PyTypeNum_ISFLOAT(int)
+    bint PyTypeNum_ISNUMBER(int)
+    bint PyTypeNum_ISSTRING(int)
+    bint PyTypeNum_ISCOMPLEX(int)
+    bint PyTypeNum_ISPYTHON(int)
+    bint PyTypeNum_ISFLEXIBLE(int)
+    bint PyTypeNum_ISUSERDEF(int)
+    bint PyTypeNum_ISEXTENDED(int)
+    bint PyTypeNum_ISOBJECT(int)
+
+    bint PyDataType_ISBOOL(dtype)
+    bint PyDataType_ISUNSIGNED(dtype)
+    bint PyDataType_ISSIGNED(dtype)
+    bint PyDataType_ISINTEGER(dtype)
+    bint PyDataType_ISFLOAT(dtype)
+    bint PyDataType_ISNUMBER(dtype)
+    bint PyDataType_ISSTRING(dtype)
+    bint PyDataType_ISCOMPLEX(dtype)
+    bint PyDataType_ISPYTHON(dtype)
+    bint PyDataType_ISFLEXIBLE(dtype)
+    bint PyDataType_ISUSERDEF(dtype)
+    bint PyDataType_ISEXTENDED(dtype)
+    bint PyDataType_ISOBJECT(dtype)
+    bint PyDataType_HASFIELDS(dtype)
+
+    bint PyArray_ISBOOL(ndarray)
+    bint PyArray_ISUNSIGNED(ndarray)
+    bint PyArray_ISSIGNED(ndarray)
+    bint PyArray_ISINTEGER(ndarray)
+    bint PyArray_ISFLOAT(ndarray)
+    bint PyArray_ISNUMBER(ndarray)
+    bint PyArray_ISSTRING(ndarray)
+    bint PyArray_ISCOMPLEX(ndarray)
+    bint PyArray_ISPYTHON(ndarray)
+    bint PyArray_ISFLEXIBLE(ndarray)
+    bint PyArray_ISUSERDEF(ndarray)
+    bint PyArray_ISEXTENDED(ndarray)
+    bint PyArray_ISOBJECT(ndarray)
+    bint PyArray_HASFIELDS(ndarray)
+
+    bint PyArray_ISVARIABLE(ndarray)
+
+    bint PyArray_SAFEALIGNEDCOPY(ndarray)
+    bint PyArray_ISNBO(ndarray)
+    bint PyArray_IsNativeByteOrder(ndarray)
+    bint PyArray_ISNOTSWAPPED(ndarray)
+    bint PyArray_ISBYTESWAPPED(ndarray)
+
+    bint PyArray_FLAGSWAP(ndarray, int)
+
+    bint PyArray_ISCARRAY(ndarray)
+    bint PyArray_ISCARRAY_RO(ndarray)
+    bint PyArray_ISFARRAY(ndarray)
+    bint PyArray_ISFARRAY_RO(ndarray)
+    bint PyArray_ISBEHAVED(ndarray)
+    bint PyArray_ISBEHAVED_RO(ndarray)
+
+
+    bint PyDataType_ISNOTSWAPPED(dtype)
+    bint PyDataType_ISBYTESWAPPED(dtype)
+
+    bint PyArray_DescrCheck(object)
+
+    bint PyArray_Check(object)
+    bint PyArray_CheckExact(object)
+
+    # Cannot be supported due to out arg:
+    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&)
+    # bint PyArray_HasArrayInterface(op, out)
+
+
+    bint PyArray_IsZeroDim(object)
+    # Cannot be supported due to ## ## in macro:
+    # bint PyArray_IsScalar(object, verbatim work)
+    bint PyArray_CheckScalar(object)
+    bint PyArray_IsPythonNumber(object)
+    bint PyArray_IsPythonScalar(object)
+    bint PyArray_IsAnyScalar(object)
+    bint PyArray_CheckAnyScalar(object)
+    ndarray PyArray_GETCONTIGUOUS(ndarray)
+    bint PyArray_SAMESHAPE(ndarray, ndarray)
+    npy_intp PyArray_SIZE(ndarray)
+    npy_intp PyArray_NBYTES(ndarray)
+
+    object PyArray_FROM_O(object)
+    object PyArray_FROM_OF(object m, int flags)
+    bint PyArray_FROM_OT(object m, int type)
+    bint PyArray_FROM_OTF(object m, int type, int flags)
+    object PyArray_FROMANY(object m, int type, int min, int max, int flags)
+    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran)
+    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran)
+    void PyArray_FILLWBYTE(object, int val)
+    npy_intp PyArray_REFCOUNT(object)
+    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth)
+    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2)
+    bint PyArray_EquivByteorders(int b1, int b2)
+    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum)
+    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data)
+    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr)
+    object PyArray_ToScalar(void* data, ndarray arr)
+
+    void* PyArray_GETPTR1(ndarray m, npy_intp i)
+    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j)
+    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k)
+    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l)
+
+    void PyArray_XDECREF_ERR(ndarray)
+    # Cannot be supported due to out arg
+    # void PyArray_DESCR_REPLACE(descr)
+
+
+    object PyArray_Copy(ndarray)
+    object PyArray_FromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth)
+
+    object PyArray_Cast(ndarray mp, int type_num)
+    object PyArray_Take(ndarray ap, object items, int axis)
+    object PyArray_Put(ndarray ap, object items, object values)
+
+    void PyArray_ITER_RESET(flatiter it) nogil
+    void PyArray_ITER_NEXT(flatiter it) nogil
+    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil
+    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil
+    void* PyArray_ITER_DATA(flatiter it) nogil
+    bint PyArray_ITER_NOTDONE(flatiter it) nogil
+
+    void PyArray_MultiIter_RESET(broadcast multi) nogil
+    void PyArray_MultiIter_NEXT(broadcast multi) nogil
+    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil
+    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil
+    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil
+    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil
+    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil
+
+    # Functions from __multiarray_api.h
+
+    # Functions taking dtype and returning object/ndarray are disabled
+    # for now as they steal dtype references. I'm conservative and disable
+    # more than is probably needed until it can be checked further.
+    int PyArray_SetNumericOps        (object)
+    object PyArray_GetNumericOps ()
+    int PyArray_INCREF (ndarray)
+    int PyArray_XDECREF (ndarray)
+    void PyArray_SetStringFunction (object, int)
+    dtype PyArray_DescrFromType (int)
+    object PyArray_TypeObjectFromType (int)
+    char * PyArray_Zero (ndarray)
+    char * PyArray_One (ndarray)
+    #object PyArray_CastToType (ndarray, dtype, int)
+    int PyArray_CastTo (ndarray, ndarray)
+    int PyArray_CastAnyTo (ndarray, ndarray)
+    int PyArray_CanCastSafely (int, int)
+    npy_bool PyArray_CanCastTo (dtype, dtype)
+    int PyArray_ObjectType (object, int)
+    dtype PyArray_DescrFromObject (object, dtype)
+    #ndarray* PyArray_ConvertToCommonType (object, int *)
+    dtype PyArray_DescrFromScalar (object)
+    dtype PyArray_DescrFromTypeObject (object)
+    npy_intp PyArray_Size (object)
+    #object PyArray_Scalar (void *, dtype, object)
+    #object PyArray_FromScalar (object, dtype)
+    void PyArray_ScalarAsCtype (object, void *)
+    #int PyArray_CastScalarToCtype (object, void *, dtype)
+    #int PyArray_CastScalarDirect (object, dtype, void *, int)
+    object PyArray_ScalarFromObject (object)
+    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int)
+    object PyArray_FromDims (int, int *, int)
+    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *)
+    #object PyArray_FromAny (object, dtype, int, int, int, object)
+    object PyArray_EnsureArray (object)
+    object PyArray_EnsureAnyArray (object)
+    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *)
+    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *)
+    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp)
+    #object PyArray_FromIter (object, dtype, npy_intp)
+    object PyArray_Return (ndarray)
+    #object PyArray_GetField (ndarray, dtype, int)
+    #int PyArray_SetField (ndarray, dtype, int, object)
+    object PyArray_Byteswap (ndarray, npy_bool)
+    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER)
+    int PyArray_MoveInto (ndarray, ndarray)
+    int PyArray_CopyInto (ndarray, ndarray)
+    int PyArray_CopyAnyInto (ndarray, ndarray)
+    int PyArray_CopyObject (ndarray, object)
+    object PyArray_NewCopy (ndarray, NPY_ORDER)
+    object PyArray_ToList (ndarray)
+    object PyArray_ToString (ndarray, NPY_ORDER)
+    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *)
+    int PyArray_Dump (object, object, int)
+    object PyArray_Dumps (object, int)
+    int PyArray_ValidType (int)
+    void PyArray_UpdateFlags (ndarray, int)
+    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object)
+    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object)
+    #dtype PyArray_DescrNew (dtype)
+    dtype PyArray_DescrNewFromType (int)
+    double PyArray_GetPriority (object, double)
+    object PyArray_IterNew (object)
+    object PyArray_MultiIterNew (int, ...)
+
+    int PyArray_PyIntAsInt (object)
+    npy_intp PyArray_PyIntAsIntp (object)
+    int PyArray_Broadcast (broadcast)
+    void PyArray_FillObjectArray (ndarray, object)
+    int PyArray_FillWithScalar (ndarray, object)
+    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *)
+    dtype PyArray_DescrNewByteorder (dtype, char)
+    object PyArray_IterAllButAxis (object, int *)
+    #object PyArray_CheckFromAny (object, dtype, int, int, int, object)
+    #object PyArray_FromArray (ndarray, dtype, int)
+    object PyArray_FromInterface (object)
+    object PyArray_FromStructInterface (object)
+    #object PyArray_FromArrayAttr (object, dtype, object)
+    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*)
+    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND)
+    object PyArray_NewFlagsObject (object)
+    npy_bool PyArray_CanCastScalar (type, type)
+    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t)
+    int PyArray_RemoveSmallest (broadcast)
+    int PyArray_ElementStrides (object)
+    void PyArray_Item_INCREF (char *, dtype)
+    void PyArray_Item_XDECREF (char *, dtype)
+    object PyArray_FieldNames (object)
+    object PyArray_Transpose (ndarray, PyArray_Dims *)
+    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE)
+    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE)
+    object PyArray_PutMask (ndarray, object, object)
+    object PyArray_Repeat (ndarray, object, int)
+    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE)
+    int PyArray_Sort (ndarray, int, NPY_SORTKIND)
+    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND)
+    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE)
+    object PyArray_ArgMax (ndarray, int, ndarray)
+    object PyArray_ArgMin (ndarray, int, ndarray)
+    object PyArray_Reshape (ndarray, object)
+    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER)
+    object PyArray_Squeeze (ndarray)
+    #object PyArray_View (ndarray, dtype, type)
+    object PyArray_SwapAxes (ndarray, int, int)
+    object PyArray_Max (ndarray, int, ndarray)
+    object PyArray_Min (ndarray, int, ndarray)
+    object PyArray_Ptp (ndarray, int, ndarray)
+    object PyArray_Mean (ndarray, int, int, ndarray)
+    object PyArray_Trace (ndarray, int, int, int, int, ndarray)
+    object PyArray_Diagonal (ndarray, int, int, int)
+    object PyArray_Clip (ndarray, object, object, ndarray)
+    object PyArray_Conjugate (ndarray, ndarray)
+    object PyArray_Nonzero (ndarray)
+    object PyArray_Std (ndarray, int, int, ndarray, int)
+    object PyArray_Sum (ndarray, int, int, ndarray)
+    object PyArray_CumSum (ndarray, int, int, ndarray)
+    object PyArray_Prod (ndarray, int, int, ndarray)
+    object PyArray_CumProd (ndarray, int, int, ndarray)
+    object PyArray_All (ndarray, int, ndarray)
+    object PyArray_Any (ndarray, int, ndarray)
+    object PyArray_Compress (ndarray, object, int, ndarray)
+    object PyArray_Flatten (ndarray, NPY_ORDER)
+    object PyArray_Ravel (ndarray, NPY_ORDER)
+    npy_intp PyArray_MultiplyList (npy_intp *, int)
+    int PyArray_MultiplyIntList (int *, int)
+    void * PyArray_GetPtr (ndarray, npy_intp*)
+    int PyArray_CompareLists (npy_intp *, npy_intp *, int)
+    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype)
+    #int PyArray_As1D (object*, char **, int *, int)
+    #int PyArray_As2D (object*, char ***, int *, int *, int)
+    int PyArray_Free (object, void *)
+    #int PyArray_Converter (object, object*)
+    int PyArray_IntpFromSequence (object, npy_intp *, int)
+    object PyArray_Concatenate (object, int)
+    object PyArray_InnerProduct (object, object)
+    object PyArray_MatrixProduct (object, object)
+    object PyArray_CopyAndTranspose (object)
+    object PyArray_Correlate (object, object, int)
+    int PyArray_TypestrConvert (int, int)
+    #int PyArray_DescrConverter (object, dtype*)
+    #int PyArray_DescrConverter2 (object, dtype*)
+    int PyArray_IntpConverter (object, PyArray_Dims *)
+    #int PyArray_BufferConverter (object, chunk)
+    int PyArray_AxisConverter (object, int *)
+    int PyArray_BoolConverter (object, npy_bool *)
+    int PyArray_ByteorderConverter (object, char *)
+    int PyArray_OrderConverter (object, NPY_ORDER *)
+    unsigned char PyArray_EquivTypes (dtype, dtype)
+    #object PyArray_Zeros (int, npy_intp *, dtype, int)
+    #object PyArray_Empty (int, npy_intp *, dtype, int)
+    object PyArray_Where (object, object, object)
+    object PyArray_Arange (double, double, double, int)
+    #object PyArray_ArangeObj (object, object, object, dtype)
+    int PyArray_SortkindConverter (object, NPY_SORTKIND *)
+    object PyArray_LexSort (object, int)
+    object PyArray_Round (ndarray, int, ndarray)
+    unsigned char PyArray_EquivTypenums (int, int)
+    int PyArray_RegisterDataType (dtype)
+    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *)
+    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND)
+    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *)
+    object PyArray_IntTupleFromIntp (int, npy_intp *)
+    int PyArray_TypeNumFromName (char *)
+    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *)
+    #int PyArray_OutputConverter (object, ndarray*)
+    object PyArray_BroadcastToShape (object, npy_intp *, int)
+    void _PyArray_SigintHandler (int)
+    void* _PyArray_GetSigintBuf ()
+    #int PyArray_DescrAlignConverter (object, dtype*)
+    #int PyArray_DescrAlignConverter2 (object, dtype*)
+    int PyArray_SearchsideConverter (object, void *)
+    object PyArray_CheckAxis (ndarray, int *, int)
+    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int)
+    int PyArray_CompareString (char *, char *, size_t)
+
+
+# Typedefs that matches the runtime dtype objects in
+# the numpy module.
+
+# The ones that are commented out needs an IFDEF function
+# in Cython to enable them only on the right systems.
+
+ctypedef npy_int8       int8_t
+ctypedef npy_int16      int16_t
+ctypedef npy_int32      int32_t
+ctypedef npy_int64      int64_t
+#ctypedef npy_int96      int96_t
+#ctypedef npy_int128     int128_t
+
+ctypedef npy_uint8      uint8_t
+ctypedef npy_uint16     uint16_t
+ctypedef npy_uint32     uint32_t
+ctypedef npy_uint64     uint64_t
+#ctypedef npy_uint96     uint96_t
+#ctypedef npy_uint128    uint128_t
+
+ctypedef npy_float32    float32_t
+ctypedef npy_float64    float64_t
+#ctypedef npy_float80    float80_t
+#ctypedef npy_float128   float128_t
+
+ctypedef float complex  complex64_t
+ctypedef double complex complex128_t
+
+# The int types are mapped a bit surprising --
+# numpy.int corresponds to 'l' and numpy.long to 'q'
+ctypedef npy_long       int_t
+ctypedef npy_longlong   long_t
+ctypedef npy_longlong   longlong_t
+
+ctypedef npy_ulong      uint_t
+ctypedef npy_ulonglong  ulong_t
+ctypedef npy_ulonglong  ulonglong_t
+
+ctypedef npy_intp       intp_t
+ctypedef npy_uintp      uintp_t
+
+ctypedef npy_double     float_t
+ctypedef npy_double     double_t
+ctypedef npy_longdouble longdouble_t
+
+ctypedef npy_cfloat      cfloat_t
+ctypedef npy_cdouble     cdouble_t
+ctypedef npy_clongdouble clongdouble_t
+
+ctypedef npy_cdouble     complex_t
+
+cdef inline object PyArray_MultiIterNew1(a):
+    return PyArray_MultiIterNew(1, <void*>a)
+
+cdef inline object PyArray_MultiIterNew2(a, b):
+    return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+
+cdef inline object PyArray_MultiIterNew3(a, b, c):
+    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+
+cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+
+cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+    # Recursive utility function used in __getbuffer__ to get format
+    # string. The new location in the format string is returned.
+
+    cdef dtype child
+    cdef int delta_offset
+    cdef tuple i
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+    cdef tuple fields
+
+    for childname in descr.names:
+        fields = descr.fields[childname]
+        child, new_offset = fields
+
+        if (end - f) - (new_offset - offset[0]) < 15:
+            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+
+        if ((child.byteorder == '>' and little_endian) or
+            (child.byteorder == '<' and not little_endian)):
+            raise ValueError(u"Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+
+        # Output padding bytes
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1
+
+        offset[0] += child.itemsize
+
+        if not PyDataType_HASFIELDS(child):
+            t = child.type_num
+            if end - f < 5:
+                raise RuntimeError(u"Format string allocated too short.")
+
+            # Until ticket #99 is fixed, use integers to avoid warnings
+            if   t == NPY_BYTE:        f[0] =  98 #"b"
+            elif t == NPY_UBYTE:       f[0] =  66 #"B"
+            elif t == NPY_SHORT:       f[0] = 104 #"h"
+            elif t == NPY_USHORT:      f[0] =  72 #"H"
+            elif t == NPY_INT:         f[0] = 105 #"i"
+            elif t == NPY_UINT:        f[0] =  73 #"I"
+            elif t == NPY_LONG:        f[0] = 108 #"l"
+            elif t == NPY_ULONG:       f[0] = 76  #"L"
+            elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+            elif t == NPY_FLOAT:       f[0] = 102 #"f"
+            elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+            elif t == NPY_OBJECT:      f[0] = 79 #"O"
+            else:
+                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+            f += 1
+        else:
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
+    return f
+
+
+#
+# ufunc API
+#
+
+cdef extern from "numpy/ufuncobject.h":
+
+    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
+
+    ctypedef extern class numpy.ufunc [object PyUFuncObject]:
+        cdef:
+            int nin, nout, nargs
+            int identity
+            PyUFuncGenericFunction *functions
+            void **data
+            int ntypes
+            int check_return
+            char *name, *types
+            char *doc
+            void *ptr
+            PyObject *obj
+            PyObject *userloops
+
+    cdef enum:
+        PyUFunc_Zero
+        PyUFunc_One
+        PyUFunc_None
+        UFUNC_ERR_IGNORE
+        UFUNC_ERR_WARN
+        UFUNC_ERR_RAISE
+        UFUNC_ERR_CALL
+        UFUNC_ERR_PRINT
+        UFUNC_ERR_LOG
+        UFUNC_MASK_DIVIDEBYZERO
+        UFUNC_MASK_OVERFLOW
+        UFUNC_MASK_UNDERFLOW
+        UFUNC_MASK_INVALID
+        UFUNC_SHIFT_DIVIDEBYZERO
+        UFUNC_SHIFT_OVERFLOW
+        UFUNC_SHIFT_UNDERFLOW
+        UFUNC_SHIFT_INVALID
+        UFUNC_FPE_DIVIDEBYZERO
+        UFUNC_FPE_OVERFLOW
+        UFUNC_FPE_UNDERFLOW
+        UFUNC_FPE_INVALID
+        UFUNC_ERR_DEFAULT
+        UFUNC_ERR_DEFAULT2
+
+    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *,
+          void **, char *, int, int, int, int, char *, char *, int)
+    int PyUFunc_RegisterLoopForType(ufunc, int,
+                                    PyUFuncGenericFunction, int *, void *)
+    int PyUFunc_GenericFunction \
+        (ufunc, PyObject *, PyObject *, PyArrayObject **)
+    void PyUFunc_f_f_As_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_f_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_g_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F_As_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_G_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f_As_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_gg_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F_As_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_GG_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_On_Om \
+         (char **, npy_intp *, npy_intp *, void *)
+    int PyUFunc_GetPyValues \
+        (char *, int *, int *, PyObject **)
+    int PyUFunc_checkfperr \
+           (int, PyObject *, int *)
+    void PyUFunc_clearfperr()
+    int PyUFunc_getfperr()
+    int PyUFunc_handlefperr \
+        (int, PyObject *, int, int *)
+    int PyUFunc_ReplaceLoopBySignature \
+        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)
+    object PyUFunc_FromFuncAndDataAndSignature \
+             (PyUFuncGenericFunction *, void **, char *, int, int, int,
+              int, char *, char *, int, char *)
+
+    void import_ufunc()
+
+
+cdef inline void set_array_base(ndarray arr, object base):
+     cdef PyObject* baseptr
+     if base is None:
+         baseptr = NULL
+     else:
+         Py_INCREF(base) # important to do this before decref below!
+         baseptr = <PyObject*>base
+     Py_XDECREF(arr.base)
+     arr.base = baseptr
+
+cdef inline object get_array_base(ndarray arr):
+    if arr.base is NULL:
+        return None
+    else:
+        return <object>arr.base
diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py
index 1fb2be9a598d5..5a22fd7adde74 100644
--- a/pandas/tseries/api.py
+++ b/pandas/tseries/api.py
@@ -8,3 +8,4 @@
 from pandas.tseries.offsets import *
 from pandas.tseries.period import PeriodIndex, period_range, pnow
 from pandas.tseries.resample import TimeGrouper
+import pandas.tseries.offsets as offsets

From d2b947b10186d90055f2d62ff709b3b449aabf56 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 14:25:12 -0400
Subject: [PATCH 054/114] BLD: fix datetime.pxd

---
 pandas/src/datetime.pxd | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd
index 6ae001c2f0010..e71139f0ab5ab 100644
--- a/pandas/src/datetime.pxd
+++ b/pandas/src/datetime.pxd
@@ -49,7 +49,6 @@ cdef extern from "numpy/ndarrayobject.h":
             NPY_SAME_KIND_CASTING
             NPY_UNSAFE_CASTING
 
-cdef extern from "numpy/ndarraytypes
 
 cdef extern from "numpy_helper.h":
     npy_datetime unbox_datetime64_scalar(object o)

From 67a98ff5010e321c0afd19e60f3af2e967a2b075 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 16:01:56 -0400
Subject: [PATCH 055/114] ENH: can pass multiple columns to
 GroupBy.__getitem__, close #383

---
 RELEASE.rst                  |  2 +
 pandas/core/groupby.py       | 71 ++++++++++++++++++++----------------
 pandas/tests/test_groupby.py | 16 ++++++++
 3 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 5bbcb54601d30..54a3d0c0d7d56 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -58,6 +58,8 @@ pandas 0.8.0
     by significant margin (#836)
   - Handle more dtypes when passed MaskedArrays in DataFrame constructor (#406)
   - Improved performance of join operations on integer keys (#682)
+  - Can pass multiple columns to GroupBy object, e.g. grouped[[col1, col2]] to
+    only aggregate a subset of the value columns (#383)
 
 **API Changes**
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 3d8f70892aa78..471ebc76c9982 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -113,9 +113,9 @@ class GroupBy(object):
     """
 
     def __init__(self, obj, keys=None, axis=0, level=None,
-                 grouper=None, exclusions=None, column=None, as_index=True,
+                 grouper=None, exclusions=None, selection=None, as_index=True,
                  sort=True, group_keys=True):
-        self._column = column
+        self._selection = selection
 
         if isinstance(obj, NDFrame):
             obj._consolidate_inplace()
@@ -159,10 +159,16 @@ def indices(self):
 
     @property
     def name(self):
-        if self._column is None:
+        if self._selection is None:
             return None # 'result'
         else:
-            return self._column
+            return self._selection
+
+    @property
+    def _selection_list(self):
+        if not isinstance(self._selection, (list, tuple, np.ndarray)):
+            return [self._selection]
+        return self._selection
 
     @property
     def _obj_with_exclusions(self):
@@ -1291,10 +1297,10 @@ class NDFrameGroupBy(GroupBy):
     def _iterate_slices(self):
         if self.axis == 0:
             # kludge
-            if self._column is None:
+            if self._selection is None:
                 slice_axis = self.obj.columns
             else:
-                slice_axis = [self._column]
+                slice_axis = self._selection_list
             slicer = lambda x: self.obj[x]
         else:
             slice_axis = self.obj.index
@@ -1358,8 +1364,8 @@ def _post_process_cython_aggregate(self, obj):
 
     @cache_readonly
     def _obj_with_exclusions(self):
-        if self._column is not None:
-            return self.obj.reindex(columns=[self._column])
+        if self._selection is not None:
+            return self.obj.reindex(columns=self._selection_list)
 
         if len(self.exclusions) > 0:
             return self.obj.drop(self.exclusions, axis=1)
@@ -1391,15 +1397,18 @@ def aggregate(self, arg, *args, **kwargs):
 
             obj = self._obj_with_exclusions
 
-            if self._column is not None:
-                series_obj = obj[self._column]
+            if self._selection is not None:
+                subset = obj[self._selection]
+                if isinstance(subset, DataFrame):
+                    raise NotImplementedError
+
                 for fname, func in arg.iteritems():
-                    colg = SeriesGroupBy(series_obj, column=self._column,
+                    colg = SeriesGroupBy(subset, selection=self._selection,
                                          grouper=self.grouper)
                     result[fname] = colg.aggregate(func)
             else:
                 for col, func in arg.iteritems():
-                    colg = SeriesGroupBy(obj[col], column=col,
+                    colg = SeriesGroupBy(obj[col], selection=col,
                                          grouper=self.grouper)
                     result[col] = colg.aggregate(func)
 
@@ -1443,7 +1452,7 @@ def _aggregate_multiple_funcs(self, arg):
         keys = []
         for col in obj:
             try:
-                colg = SeriesGroupBy(obj[col], column=col,
+                colg = SeriesGroupBy(obj[col], selection=col,
                                      grouper=self.grouper)
                 results.append(colg.aggregate(arg))
                 keys.append(col)
@@ -1490,7 +1499,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
         cannot_agg = []
         for item in obj:
             try:
-                colg = SeriesGroupBy(obj[item], column=item,
+                colg = SeriesGroupBy(obj[item], selection=item,
                                      grouper=self.grouper)
                 result[item] = colg.aggregate(func, *args, **kwargs)
             except (ValueError, TypeError):
@@ -1620,22 +1629,21 @@ class DataFrameGroupBy(NDFrameGroupBy):
     _block_agg_axis = 1
 
     def __getitem__(self, key):
-        if self._column is not None:
-            raise Exception('Column %s already selected' % self._column)
-
-        if key not in self.obj:  # pragma: no cover
-            raise KeyError(str(key))
+        if self._selection is not None:
+            raise Exception('Column(s) %s already selected' % self._selection)
 
-        # kind of a kludge
-        if self.as_index:
-            return SeriesGroupBy(self.obj[key], column=key,
-                                 grouper=self.grouper,
-                                 exclusions=self.exclusions)
-        else:
-            return DataFrameGroupBy(self.obj, self.grouper, column=key,
+        if isinstance(key, (list, tuple)) or not self.as_index:
+            return DataFrameGroupBy(self.obj, self.grouper, selection=key,
                                     grouper=self.grouper,
                                     exclusions=self.exclusions,
                                     as_index=self.as_index)
+        else:
+            if key not in self.obj:  # pragma: no cover
+                raise KeyError(str(key))
+            # kind of a kludge
+            return SeriesGroupBy(self.obj[key], selection=key,
+                                 grouper=self.grouper,
+                                 exclusions=self.exclusions)
 
     def _wrap_generic_output(self, result, obj):
         result_index = self.grouper.levels[0]
@@ -1733,14 +1741,15 @@ class PanelGroupBy(NDFrameGroupBy):
     def _iterate_slices(self):
         if self.axis == 0:
             # kludge
-            if self._column is None:
+            if self._selection is None:
                 slice_axis = self.obj.items
             else:
-                slice_axis = [self._column]
+                slice_axis = self._selection_list
             slicer = lambda x: self.obj[x]
-        elif foo:
-            slice_axis = self.obj.index
-            slicer = lambda x: self.obj.xs(x, axis=self.axis)
+        else:
+            raise NotImplementedError
+            # slice_axis = self.obj.index
+            # slicer = lambda x: self.obj.xs(x, axis=self.axis)
 
         for val in slice_axis:
             if val in self.exclusions:
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 240c86bf9df4a..d42326f50a2a8 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1713,6 +1713,22 @@ def g(group):
         self.assert_(isinstance(result, Series))
         assert_series_equal(result, expected)
 
+    def test_getitem_list_of_columns(self):
+        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                              'foo', 'bar', 'foo', 'foo'],
+                        'B': ['one', 'one', 'two', 'three',
+                              'two', 'two', 'one', 'three'],
+                        'C': np.random.randn(8),
+                        'D': np.random.randn(8),
+                        'E': np.random.randn(8)})
+
+        result = df.groupby('A')[['C', 'D']].mean()
+        result2 = df.groupby('A')['C', 'D'].mean()
+        expected = df.ix[:, ['A', 'C', 'D']].groupby('A').mean()
+
+        assert_frame_equal(result, expected)
+        assert_frame_equal(result2, expected)
+
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = map(tuple, df[keys].values)
     tups = com._asarray_tuplesafe(tups)

From 2e9de0e1fa1ecf0762a184075dca22aee1415172 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 16:28:08 -0400
Subject: [PATCH 056/114] ENH: accept list of tuples, preserving function order
 in SeriesGroupBy.aggregate

---
 RELEASE.rst                    |  2 ++
 doc/source/whatsnew.rst        |  2 ++
 doc/source/whatsnew/v0.8.0.txt |  4 ++++
 pandas/core/groupby.py         | 16 ++++++++++++----
 pandas/tests/test_groupby.py   |  9 +++++++++
 5 files changed, 29 insertions(+), 4 deletions(-)
 create mode 100644 doc/source/whatsnew/v0.8.0.txt

diff --git a/RELEASE.rst b/RELEASE.rst
index 54a3d0c0d7d56..8f619e7ddabcc 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -44,6 +44,8 @@ pandas 0.8.0
   - Dates can be split across multiple columns (#1227, #1186)
   - Add experimental support for converting pandas DataFrame to R data.frame
     via rpy2 (#350, #1212)
+  - Can pass list of (name, function) to GroupBy.aggregate to get aggregates in
+    a particular order (#610)
 
 **Improvements to existing features**
 
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 85c47d46beb74..b930bdbbde1b1 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -16,6 +16,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.8.0.txt
+
 .. include:: whatsnew/v0.7.3.txt
 
 .. include:: whatsnew/v0.7.2.txt
diff --git a/doc/source/whatsnew/v0.8.0.txt b/doc/source/whatsnew/v0.8.0.txt
new file mode 100644
index 0000000000000..98f90d5254ed1
--- /dev/null
+++ b/doc/source/whatsnew/v0.8.0.txt
@@ -0,0 +1,4 @@
+.. _whatsnew_080:
+
+v.0.8.0 (TDB May, 2012)
+-----------------------
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 471ebc76c9982..36b80af0fba5f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1207,15 +1207,23 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
         return ret
 
     def _aggregate_multiple_funcs(self, arg):
-        if not isinstance(arg, dict):
-            arg = dict((func.__name__, func) for func in arg)
+        if isinstance(arg, dict):
+            columns = arg.keys()
+            arg = arg.items()
+        elif isinstance(arg[0], (tuple, list)):
+            # indicated column order
+            columns = zip(*arg)[0]
+        else:
+            # list of functions
+            columns = [func.__name__ for func in arg]
+            arg = zip(columns, arg)
 
         results = {}
 
-        for name, func in arg.iteritems():
+        for name, func in arg:
             results[name] = self.aggregate(func)
 
-        return DataFrame(results)
+        return DataFrame(results, columns=columns)
 
     def _wrap_aggregated_output(self, output, names=None):
         # sort of a kludge
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index d42326f50a2a8..a4fbe444d5b16 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1729,6 +1729,15 @@ def test_getitem_list_of_columns(self):
         assert_frame_equal(result, expected)
         assert_frame_equal(result2, expected)
 
+    def test_agg_multiple_functions_maintain_order(self):
+
+        funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]
+        result = self.df.groupby('A')['C'].agg(funcs)
+        exp_cols = ['mean', 'max', 'min']
+
+        self.assert_(np.array_equal(result.columns, exp_cols))
+
+
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = map(tuple, df[keys].values)
     tups = com._asarray_tuplesafe(tups)

From 92d050bafb7eb1af257dde90905e49265f13863d Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 17:10:24 -0400
Subject: [PATCH 057/114] ENH: more flexible multiple function application in
 DataFrameGroupBy, close #642

---
 pandas/core/groupby.py       | 28 ++++++++++++++++++++++------
 pandas/tests/test_frame.py   |  4 ++--
 pandas/tests/test_groupby.py | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 36b80af0fba5f..e5ce4ffdf77d3 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1212,7 +1212,7 @@ def _aggregate_multiple_funcs(self, arg):
             arg = arg.items()
         elif isinstance(arg[0], (tuple, list)):
             # indicated column order
-            columns = zip(*arg)[0]
+            columns = list(zip(*arg))[0]
         else:
             # list of functions
             columns = [func.__name__ for func in arg]
@@ -1405,22 +1405,38 @@ def aggregate(self, arg, *args, **kwargs):
 
             obj = self._obj_with_exclusions
 
+            if any(isinstance(x, (list, tuple, dict)) for x in arg.values()):
+                new_arg = {}
+                for k, v in arg.iteritems():
+                    if not isinstance(v, (tuple, list, dict)):
+                        new_arg[k] = [v]
+                    else:
+                        new_arg[k] = v
+                arg = new_arg
+
+            keys = []
             if self._selection is not None:
                 subset = obj[self._selection]
                 if isinstance(subset, DataFrame):
                     raise NotImplementedError
 
-                for fname, func in arg.iteritems():
+                for fname, agg_how in arg.iteritems():
                     colg = SeriesGroupBy(subset, selection=self._selection,
                                          grouper=self.grouper)
-                    result[fname] = colg.aggregate(func)
+                    result[fname] = colg.aggregate(agg_how)
+                    keys.append(fname)
             else:
-                for col, func in arg.iteritems():
+                for col, agg_how in arg.iteritems():
                     colg = SeriesGroupBy(obj[col], selection=col,
                                          grouper=self.grouper)
-                    result[col] = colg.aggregate(func)
+                    result[col] = colg.aggregate(agg_how)
+                    keys.append(col)
 
-            result = DataFrame(result)
+            if isinstance(result.values()[0], DataFrame):
+                from pandas.tools.merge import concat
+                result = concat([result[k] for k in keys], keys=keys, axis=1)
+            else:
+                result = DataFrame(result)
         elif isinstance(arg, list):
             return self._aggregate_multiple_funcs(arg)
         else:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index b23ba46b44833..ea189be079420 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1635,8 +1635,8 @@ def test_constructor_maskedarray_nonfloat(self):
 
         # cast type
         frame = DataFrame(mat, columns=['A', 'B', 'C'],
-                           index=[1, 2], dtype=int)
-        self.assert_(frame.values.dtype == int)
+                           index=[1, 2], dtype=np.int64)
+        self.assert_(frame.values.dtype == np.int64)
 
         # Check non-masked values
         mat2 = ma.copy(mat)
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index a4fbe444d5b16..524738e097330 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1730,13 +1730,47 @@ def test_getitem_list_of_columns(self):
         assert_frame_equal(result2, expected)
 
     def test_agg_multiple_functions_maintain_order(self):
-
+        # GH #610
         funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]
         result = self.df.groupby('A')['C'].agg(funcs)
         exp_cols = ['mean', 'max', 'min']
 
         self.assert_(np.array_equal(result.columns, exp_cols))
 
+    def test_more_flexible_frame_multi_function(self):
+        from pandas import concat
+
+        grouped = self.df.groupby('A')
+
+        exmean = grouped.agg({'C' : np.mean, 'D' : np.mean})
+        exstd = grouped.agg({'C' : np.std, 'D' : np.std})
+
+        expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1)
+        expected = expected.swaplevel(0, 1, axis=1).sortlevel(0, axis=1)
+
+        result = grouped.aggregate({'C' : [np.mean, np.std],
+                                    'D' : [np.mean, np.std]})
+
+        assert_frame_equal(result, expected)
+
+        # be careful
+        result = grouped.aggregate({'C' : np.mean,
+                                     'D' : [np.mean, np.std]})
+        expected = grouped.aggregate({'C' : [np.mean],
+                                      'D' : [np.mean, np.std]})
+        assert_frame_equal(result, expected)
+
+
+        def foo(x): return np.mean(x)
+        def bar(x): return np.std(x, ddof=1)
+        result = grouped.aggregate({'C' : np.mean,
+                                    'D' : {'foo': np.mean,
+                                           'bar': np.std}})
+        expected = grouped.aggregate({'C' : [np.mean],
+                                      'D' : [foo, bar]})
+        assert_frame_equal(result, expected)
+
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = map(tuple, df[keys].values)

From b07f0971bcecc54978031b581a929140b75c0614 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 17:18:32 -0400
Subject: [PATCH 058/114] DOC: release notes

---
 RELEASE.rst        | 2 ++
 doc/source/conf.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 8f619e7ddabcc..24ab824914b98 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -46,6 +46,8 @@ pandas 0.8.0
     via rpy2 (#350, #1212)
   - Can pass list of (name, function) to GroupBy.aggregate to get aggregates in
     a particular order (#610)
+  - Can pass dicts with lists of functions or dicts to GroupBy aggregate to do
+    much more flexible multiple function aggregation (#642)
 
 **Improvements to existing features**
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 970700ff4d275..f2fc6511143d8 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -209,7 +209,7 @@
 latex_documents = [
   ('index', 'pandas.tex',
    u'pandas: powerful Python data analysis toolkit',
-   u'Wes McKinney', 'manual'),
+   u'Wes McKinney\n& PyData Development Team', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of

From 48a073a4c3379f68622f73f38a757513438b323d Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Fri, 20 Apr 2012 23:38:04 +0200
Subject: [PATCH 059/114] ENH: treat complex number in maybe_convert_objects

---
 pandas/src/inference.pyx     | 28 +++++++++++++++++++++++-----
 pandas/src/numpy_helper.h    |  4 ++++
 pandas/src/util.pxd          |  1 +
 pandas/tests/test_tseries.py |  7 +++++++
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 3b23de6eabf8b..20b31707a7be9 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -270,9 +270,11 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
+        ndarray[complex64_t] complexes
         ndarray[int64_t] ints
         ndarray[uint8_t] bools
         bint seen_float = 0
+        bint seen_complex = 0
         bint seen_int = 0
         bint seen_bool = 0
         bint seen_object = 0
@@ -283,6 +285,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     n = len(objects)
 
     floats = np.empty(n, dtype='f8')
+    complexes = np.empty(n, dtype='c8')
     ints = np.empty(n, dtype='i8')
     bools = np.empty(n, dtype=np.uint8)
 
@@ -294,7 +297,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
 
         if val is None:
             seen_null = 1
-            floats[i] = fnan
+            floats[i] = complexes[i] = fnan
         elif util.is_bool_object(val):
             seen_bool = 1
             bools[i] = val
@@ -305,15 +308,20 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         elif util.is_integer_object(val):
             seen_int = 1
             floats[i] = <float64_t> val
+            complexes[i] = <double complex> val
             if not seen_null:
                 ints[i] = val
         elif util.is_float_object(val):
-            floats[i] = val
+            floats[i] = complexes[i] = val
             seen_float = 1
+        elif util.is_complex_object(val):
+            complexes[i] = val
+            seen_complex = 1
         elif try_float and not util.is_string_object(val):
             # this will convert Decimal objects
             try:
                 floats[i] = float(val)
+                complexes[i] = complex(val)
                 seen_float = 1
             except Exception:
                 seen_object = 1
@@ -323,14 +331,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     if not safe:
         if seen_null:
             if (seen_float or seen_int) and not seen_object:
-                return floats
+                if seen_complex:
+                    return complexes
+                else:
+                    return floats
             else:
                 return objects
         else:
             if seen_object:
                 return objects
             elif not seen_bool:
-                if seen_float:
+                if seen_complex:
+                    return complexes
+                elif seen_float:
                     return floats
                 elif seen_int:
                     return ints
@@ -343,7 +356,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         # don't cast int to float, etc.
         if seen_null:
             if (seen_float or seen_int) and not seen_object:
-                return floats
+                if seen_complex:
+                    return complexes
+                else:
+                    return floats
             else:
                 return objects
         else:
@@ -352,6 +368,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             elif not seen_bool:
                 if seen_int and seen_float:
                     return objects
+                elif seen_complex:
+                    return complexes
                 elif seen_float:
                     return floats
                 elif seen_int:
diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h
index b2fecfdd7ed35..b63835119fb35 100644
--- a/pandas/src/numpy_helper.h
+++ b/pandas/src/numpy_helper.h
@@ -64,6 +64,10 @@ PANDAS_INLINE int
 is_float_object(PyObject* obj) {
   return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating));
 }
+PANDAS_INLINE int
+is_complex_object(PyObject* obj) {
+  return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating));
+}
 
 PANDAS_INLINE int
 is_bool_object(PyObject* obj) {
diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd
index 22d7c7896902c..3ebd72cc83ee4 100644
--- a/pandas/src/util.pxd
+++ b/pandas/src/util.pxd
@@ -4,6 +4,7 @@ cimport numpy as cnp
 cdef extern from "numpy_helper.h":
     inline int is_integer_object(object)
     inline int is_float_object(object)
+    inline int is_complex_object(object)
     inline int is_bool_object(object)
     inline int is_string_object(object)
     inline int is_datetime64_object(object)
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index d9ddf63fea29c..a25dc60053a18 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -208,6 +208,13 @@ def test_convert_objects_ints():
         result = lib.maybe_convert_objects(arr)
         assert(issubclass(result.dtype.type, np.integer))
 
+def test_convert_objects_complex_number():
+    for dtype in np.sctypes['complex']:
+        arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O')
+        assert(arr[0].dtype == np.dtype(dtype))
+        result = lib.maybe_convert_objects(arr)
+        assert(issubclass(result.dtype.type, np.complexfloating))
+
 def test_rank():
     from pandas.compat.scipy import rankdata
 

From a3e538fb5f14b7674fb63fec8a6af0dc8924a086 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Sat, 21 Apr 2012 00:00:13 +0200
Subject: [PATCH 060/114] ENH: treat complex number in maybe_convert_objects

---
 pandas/src/inference.pyx | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 20b31707a7be9..6c88d293106ab 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -223,31 +223,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
+        ndarray[complex64_t] complexes
         ndarray[int64_t] ints
         bint seen_float = 0
+        bint seen_complex = 0
         object val
         float64_t fval
 
     n = len(values)
 
     floats = np.empty(n, dtype='f8')
+    complexes = np.empty(n, dtype='c8')
     ints = np.empty(n, dtype='i8')
 
     for i from 0 <= i < n:
         val = values[i]
 
         if util.is_float_object(val):
-            floats[i] = val
+            floats[i] = complexes[i] = val
             seen_float = 1
         elif val in na_values:
-            floats[i] = nan
+            floats[i] = complexes[i] = nan
             seen_float = 1
         elif val is None:
-            floats[i] = nan
+            floats[i] = complexes[i] = nan
             seen_float = 1
         elif len(val) == 0:
-            floats[i] = nan
+            floats[i] = complexes[i] = nan
             seen_float = 1
+        elif util.is_complex_object(val):
+            complexes[i] = val
+            seen_complex = 1
         else:
             fval = util.floatify(val)
             floats[i] = fval
@@ -257,7 +263,9 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
                 else:
                     ints[i] = <int64_t> fval
 
-    if seen_float:
+    if seen_complex:
+        return complexes
+    elif seen_float:
         return floats
     else:
         return ints

From ca6558cad129df936d2e14ef56e928dbaed8ccc9 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Sat, 21 Apr 2012 00:01:26 +0200
Subject: [PATCH 061/114] TST: Add complex number in
 test_constructor_scalar_inference

---
 pandas/tests/test_frame.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index ea189be079420..ff25c7cde01a8 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1682,12 +1682,13 @@ def test_constructor_corner(self):
 
     def test_constructor_scalar_inference(self):
         data = {'int' : 1, 'bool' : True,
-                'float' : 3., 'object' : 'foo'}
+                'float' : 3., 'complex': 4j, 'object' : 'foo'}
         df = DataFrame(data, index=np.arange(10))
 
         self.assert_(df['int'].dtype == np.int64)
         self.assert_(df['bool'].dtype == np.bool_)
         self.assert_(df['float'].dtype == np.float64)
+        self.assert_(df['complex'].dtype == np.complex64)
         self.assert_(df['object'].dtype == np.object_)
 
     def test_constructor_DataFrame(self):

From 3f3b900e5984f26f28f90adc12a384a2b0ad4fa1 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Sat, 21 Apr 2012 00:26:48 +0200
Subject: [PATCH 062/114] ENH: treat complex number in internals.form_blocks

---
 pandas/core/internals.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index f74c38ac5f450..af46af5ca8f91 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -987,6 +987,7 @@ def form_blocks(data, axes):
     # put "leftover" items in float bucket, where else?
     # generalize?
     float_dict = {}
+    complex_dict = {}
     int_dict = {}
     bool_dict = {}
     object_dict = {}
@@ -994,6 +995,8 @@ def form_blocks(data, axes):
     for k, v in data.iteritems():
         if issubclass(v.dtype.type, np.floating):
             float_dict[k] = v
+        elif issubclass(v.dtype.type, np.complexfloating):
+            complex_dict[k] = v
         elif issubclass(v.dtype.type, np.datetime64):
             datetime_dict[k] = v
         elif issubclass(v.dtype.type, np.integer):
@@ -1008,6 +1011,10 @@ def form_blocks(data, axes):
         float_block = _simple_blockify(float_dict, items, np.float64)
         blocks.append(float_block)
 
+    if len(complex_dict):
+        complex_block = _simple_blockify(complex_dict, items, np.complex64)
+        blocks.append(complex_block)
+
     if len(int_dict):
         int_block = _simple_blockify(int_dict, items, np.int64)
         blocks.append(int_block)

From dc43a1e1000f28a178165fa2a5633ec1f6e449c0 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Sat, 21 Apr 2012 04:49:47 +0200
Subject: [PATCH 063/114] ENH: add internals.ComplexBlock

---
 pandas/core/internals.py       | 22 +++++++++++++++++-----
 pandas/src/tseries.pyx         |  2 +-
 pandas/tests/test_internals.py | 16 ++++++++++++----
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index af46af5ca8f91..198c57ba2b5d4 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -250,6 +250,12 @@ def should_store(self, value):
         # unnecessarily
         return issubclass(value.dtype.type, np.floating)
 
+class ComplexBlock(Block):
+    _can_hold_na = True
+
+    def should_store(self, value):
+        return issubclass(value.dtype.type, np.complexfloating)
+
 class IntBlock(Block):
     _can_hold_na = False
 
@@ -267,7 +273,8 @@ class ObjectBlock(Block):
 
     def should_store(self, value):
         return not issubclass(value.dtype.type,
-                              (np.integer, np.floating, np.bool_))
+                              (np.integer, np.floating, np.complexfloating,
+                               np.bool_))
 
 class DatetimeBlock(IntBlock):
     _can_hold_na = True
@@ -279,6 +286,8 @@ def make_block(values, items, ref_items, do_integrity_check=False):
 
     if issubclass(vtype, np.floating):
         klass = FloatBlock
+    elif issubclass(vtype, np.complexfloating):
+        klass = ComplexBlock
     elif issubclass(vtype, np.datetime64):
         klass = DatetimeBlock
     elif issubclass(vtype, np.integer):
@@ -423,7 +432,7 @@ def is_consolidated(self):
 
     def get_numeric_data(self, copy=False):
         num_blocks = [b for b in self.blocks
-                      if isinstance(b, (IntBlock, FloatBlock))]
+                      if isinstance(b, (IntBlock, FloatBlock, ComplexBlock))]
 
         indexer = np.sort(np.concatenate([b.ref_locs for b in num_blocks]))
         new_items = self.items.take(indexer)
@@ -1103,8 +1112,9 @@ def _interleaved_dtype(blocks):
     have_bool = counts[BoolBlock] > 0
     have_object = counts[ObjectBlock] > 0
     have_float = counts[FloatBlock] > 0
+    have_complex = counts[ComplexBlock] > 0
     have_dt64 = counts[DatetimeBlock] > 0
-    have_numeric = have_float or have_int
+    have_numeric = have_float or have_complex or have_int
 
     if have_object:
         return np.object_
@@ -1112,10 +1122,12 @@ def _interleaved_dtype(blocks):
         return np.object_
     elif have_bool:
         return np.bool_
-    elif have_int and not have_float:
+    elif have_int and not have_float and not have_complex:
         return np.int64
-    elif have_dt64 and not have_float:
+    elif have_dt64 and not have_float and not have_complex:
         return np.datetime64
+    elif have_complex:
+        return np.complex64
     else:
         return np.float64
 
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index b8685a051eba3..55c0b3c5a92c7 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -156,7 +156,7 @@ cdef double INF = <double> np.inf
 cdef double NEGINF = -INF
 
 cpdef checknull(object val):
-    if util.is_float_object(val):
+    if util.is_float_object(val) or util.is_complex_object(val):
         return val != val or val == INF or val == NEGINF
     elif util.is_datetime64_object(val):
         return val.view('i8') == NaT
diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
index 84a0589443249..976b4439fffdf 100644
--- a/pandas/tests/test_internals.py
+++ b/pandas/tests/test_internals.py
@@ -19,13 +19,17 @@ def assert_block_equal(left, right):
 def get_float_mat(n, k):
     return np.repeat(np.atleast_2d(np.arange(k, dtype=float)), n, axis=0)
 
-TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
+TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
 N = 10
 
 def get_float_ex(cols=['a', 'c', 'e']):
     floats = get_float_mat(N, 3).T
     return make_block(floats, cols, TEST_COLS)
 
+def get_complex_ex(cols=['h']):
+    complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex64)
+    return make_block(complexes, cols, TEST_COLS)
+
 def get_obj_ex(cols=['b', 'd']):
     mat = np.empty((N, 2), dtype=object)
     mat[:, 0] = 'foo'
@@ -44,6 +48,7 @@ class TestBlock(unittest.TestCase):
 
     def setUp(self):
         self.fblock = get_float_ex()
+        self.cblock = get_complex_ex()
         self.oblock = get_obj_ex()
         self.bool_block = get_bool_ex()
         self.int_block = get_int_ex()
@@ -60,6 +65,7 @@ def _check(blk):
             assert_block_equal(blk, unpickled)
 
         _check(self.fblock)
+        _check(self.cblock)
         _check(self.oblock)
         _check(self.bool_block)
 
@@ -175,7 +181,8 @@ def setUp(self):
         self.blocks = [get_float_ex(),
                        get_obj_ex(),
                        get_bool_ex(),
-                       get_int_ex()]
+                       get_int_ex(),
+                       get_complex_ex()]
         self.mgr = BlockManager.from_blocks(self.blocks, np.arange(N))
 
     def test_constructor_corner(self):
@@ -198,13 +205,13 @@ def test_is_indexed_like(self):
         self.assert_(not self.mgr._is_indexed_like(mgr2))
 
     def test_block_id_vector_item_dtypes(self):
-        expected = [0, 1, 0, 1, 0, 2, 3]
+        expected = [0, 1, 0, 1, 0, 2, 3, 4]
         result = self.mgr.block_id_vector
         assert_almost_equal(expected, result)
 
         result = self.mgr.item_dtypes
         expected = ['float64', 'object', 'float64', 'object', 'float64',
-                    'bool', 'int64']
+                    'bool', 'int64', 'complex64']
         self.assert_(np.array_equal(result, expected))
 
     def test_union_block_items(self):
@@ -298,6 +305,7 @@ def test_consolidate_ordering_issues(self):
         self.mgr.set('d', randn(N))
         self.mgr.set('b', randn(N))
         self.mgr.set('g', randn(N))
+        self.mgr.set('h', randn(N))
 
         cons = self.mgr.consolidate()
         self.assertEquals(cons.nblocks, 1)

From c280d2237c148224a0e358e21d4a761f11e68272 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Sat, 21 Apr 2012 04:57:53 +0200
Subject: [PATCH 064/114] BUG: fix max recursion error in test_reindex_items

It looks like sorting by dtype itself does not work.
To see that, try this snippet:

>>> from numpy import dtype
>>> sorted([dtype('bool'), dtype('float64'), dtype('complex64'),
...         dtype('float64'), dtype('object')])
[dtype('bool'),
 dtype('float64'),
 dtype('complex64'),
 dtype('float64'),
 dtype('object')]
---
 pandas/core/internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 198c57ba2b5d4..77969ffa26f17 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1135,7 +1135,7 @@ def _consolidate(blocks, items):
     """
     Merge blocks having same dtype
     """
-    get_dtype = lambda x: x.dtype
+    get_dtype = lambda x: x.dtype.name
 
     # sort by dtype
     grouper = itertools.groupby(sorted(blocks, key=get_dtype),

From a7698da0e61df03bd017da54876e097d50a9cb0a Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 20:05:57 -0400
Subject: [PATCH 065/114] BLD: fix platform int issues

---
 pandas/core/groupby.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index e5ce4ffdf77d3..6d5ae2a573482 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2012,11 +2012,12 @@ def _get_indices_dict(label_list, keys):
     sorter, _ = lib.groupsort_indexer(com._ensure_int64(group_index),
                                       np.prod(shape))
 
-    sorted_labels = [lab.take(sorter) for lab in label_list]
-    group_index = group_index.take(sorter)
-    index = np.arange(len(group_index)).take(sorter)
+    sorter_int = com._ensure_platform_int(sorter)
 
-    return lib.indices_fast(index, group_index, keys, sorted_labels)
+    sorted_labels = [lab.take(sorter_int) for lab in label_list]
+    group_index = group_index.take(sorter_int)
+
+    return lib.indices_fast(sorter, group_index, keys, sorted_labels)
 
 #----------------------------------------------------------------------
 # sorting levels...cleverly?

From 0782990a2c51acb2aa4b8b13a496ab8813320b0f Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 20:15:40 -0400
Subject: [PATCH 066/114] TST: verify consistently set group name, close #184

---
 pandas/tests/test_groupby.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 524738e097330..b1c59bade0e95 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -1770,6 +1770,32 @@ def bar(x): return np.std(x, ddof=1)
                                       'D' : [foo, bar]})
         assert_frame_equal(result, expected)
 
+    def test_set_group_name(self):
+        def f(group):
+            assert group.name is not None
+            return group
+
+        def freduce(group):
+            assert group.name is not None
+            return group.sum()
+
+        def foo(x):
+            return freduce(x)
+
+        def _check_all(grouped):
+            # make sure all these work
+            grouped.apply(f)
+            grouped.aggregate(freduce)
+            grouped.aggregate({'C': freduce, 'D': freduce})
+            grouped.transform(f)
+
+            grouped['C'].apply(f)
+            grouped['C'].aggregate(freduce)
+            grouped['C'].aggregate([freduce, foo])
+            grouped['C'].transform(f)
+
+        _check_all(self.df.groupby('A'))
+        _check_all(self.df.groupby(['A', 'B']))
 
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):

From d66ac452ef628eb72d3118beefd611377f01749c Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 20:48:11 -0400
Subject: [PATCH 067/114] ENH: don't populate hash table in index engine if >
 1e6 elements, to save memory and speed. close #1160

---
 pandas/__init__.py     |  1 +
 pandas/src/engines.pyx | 68 +++++++++++++++++++++++++++++-------------
 vb_suite/timeseries.py | 15 +++++++++-
 3 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index 5451ee750d685..94400d1172935 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -36,4 +36,5 @@
 
 from pandas.tools.merge import merge, concat
 from pandas.tools.pivot import pivot_table, crosstab
+from pandas.tools.plotting import scatter_matrix
 from pandas.tools.describe import value_range
diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx
index 809de9e1015ad..b465dc3707705 100644
--- a/pandas/src/engines.pyx
+++ b/pandas/src/engines.pyx
@@ -44,11 +44,17 @@ def get_value_at(ndarray arr, object loc):
 def set_value_at(ndarray arr, object loc, object val):
     return util.set_value_at(arr, loc, val)
 
+
+# Don't populate hash tables in monotonic indexes larger than this
+cdef int _SIZE_CUTOFF = 1000000
+
+
 cdef class IndexEngine:
 
     cdef readonly:
         object index_weakref
         HashTable mapping
+        bint over_size_threshold
 
     cdef:
         bint unique, monotonic
@@ -56,6 +62,9 @@ cdef class IndexEngine:
 
     def __init__(self, index_weakref):
         self.index_weakref = index_weakref
+
+        self.over_size_threshold = len(index_weakref()) >= _SIZE_CUTOFF
+
         self.initialized = 0
         self.monotonic_check = 0
 
@@ -101,6 +110,15 @@ cdef class IndexEngine:
         if is_definitely_invalid_key(val):
             raise TypeError
 
+        if self.over_size_threshold and self.is_monotonic:
+            if not self.is_unique:
+                return self._get_loc_duplicates(val)
+            values = self._get_index_values()
+            loc = values.searchsorted(val, side='left')
+            if util.get_value_at(values, loc) != val:
+                raise KeyError(val)
+            return loc
+
         self._ensure_mapping_populated()
         if not self.unique:
             return self._get_loc_duplicates(val)
@@ -337,19 +355,17 @@ cdef class ObjectEngine(IndexEngine):
 
 cdef class DatetimeEngine(Int64Engine):
 
-    # cdef Int64HashTable mapping
-
     def __contains__(self, object val):
-        self._ensure_mapping_populated()
-
-        if util.is_datetime64_object(val):
-            return val.view('i8') in self.mapping
-
-        if PyDateTime_Check(val):
-            key = np.datetime64(val)
-            return key.view('i8') in self.mapping
+        if self.over_size_threshold and self.is_monotonic:
+            if not self.is_unique:
+                return self._get_loc_duplicates(val)
+            values = self._get_index_values()
+            conv = _to_i8(val)
+            loc = values.searchsorted(conv, side='left')
+            return util.get_value_at(values, loc) == conv
 
-        return val in self.mapping
+        self._ensure_mapping_populated()
+        return _to_i8(val) in self.mapping
 
     cdef _get_index_values(self):
         return self.index_weakref().values.view('i8')
@@ -363,13 +379,19 @@ cdef class DatetimeEngine(Int64Engine):
 
         # Welcome to the spaghetti factory
 
+        if self.over_size_threshold and self.is_monotonic:
+            if not self.is_unique:
+                return self._get_loc_duplicates(val)
+            values = self._get_index_values()
+            conv = _to_i8(val)
+            loc = values.searchsorted(conv, side='left')
+            if util.get_value_at(values, loc) != conv:
+                raise KeyError(val)
+            return loc
+
         self._ensure_mapping_populated()
         if not self.unique:
-            if util.is_datetime64_object(val):
-                val = val.view('i8')
-            elif PyDateTime_Check(val):
-                val = np.datetime64(val)
-                val = val.view('i8')
+            val = _to_i8(val)
             return self._get_loc_duplicates(val)
 
         try:
@@ -380,11 +402,7 @@ cdef class DatetimeEngine(Int64Engine):
             pass
 
         try:
-            if util.is_datetime64_object(val):
-                val = val.view('i8')
-            elif PyDateTime_Check(val):
-                val = np.datetime64(val)
-                val = val.view('i8')
+            val = _to_i8(val)
             return self.mapping.get_item(val)
         except TypeError:
             self._date_check_type(val)
@@ -417,6 +435,14 @@ cdef class DatetimeEngine(Int64Engine):
                                      limit=limit)
 
 
+cdef inline _to_i8(object val):
+    if util.is_datetime64_object(val):
+        val = unbox_datetime64_scalar(val)
+    elif PyDateTime_Check(val):
+        val = np.datetime64(val)
+        val = unbox_datetime64_scalar(val)
+    return val
+
 # ctypedef fused idxvalue_t:
 #     object
 #     int
diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py
index 1fccea71f4ba9..98efe7917d977 100644
--- a/vb_suite/timeseries.py
+++ b/vb_suite/timeseries.py
@@ -9,11 +9,24 @@
     rng = date_range('1/1/2000', periods=N, freq='min')
 except NameError:
     rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
-    date_range = DateRange
+    def date_range(start=None, end=None, periods=None, freq=None):
+        return DateRange(start, end, periods=periods, offset=freq)
 
 ts = Series(np.random.randn(N), index=rng)
 """
 
+#----------------------------------------------------------------------
+# Lookup value in large time series, hash map population
+
+setup = common_setup + """
+rng = date_range('1/1/2000', periods=1500000, freq='s')
+ts = Series(1, index=rng)
+"""
+
+stmt = "ts[ts.index[len(ts) // 2]]; ts.index._cleanup()"
+timeseries_large_lookup_value = Benchmark(stmt, setup,
+                                          start_date=datetime(2012, 1, 1))
+
 #----------------------------------------------------------------------
 # Test slice minutely series
 

From be5b5a4b30f77d31c891e69d341c29ed5e16db41 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Mon, 14 May 2012 21:01:02 -0400
Subject: [PATCH 068/114] ENH: support different 'bases' when resampling
 regular intervals like 5 minute, close #1119

---
 pandas/core/generic.py                | 12 +++--
 pandas/tseries/resample.py            | 73 ++++++++++++++-------------
 pandas/tseries/tests/test_resample.py |  9 ++++
 3 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5bd41423c9a2f..41b293c17461e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -155,9 +155,9 @@ def asfreq(self, freq, method=None, how=None):
         from pandas.tseries.resample import asfreq
         return asfreq(self, freq, method=method, how=how)
 
-    def resample(self, rule, how='mean', axis=0,
-                 fill_method=None, closed='right', label='right',
-                 convention=None, kind=None, loffset=None, limit=None):
+    def resample(self, rule, how='mean', axis=0, fill_method=None,
+                 closed='right', label='right', convention=None,
+                 kind=None, loffset=None, limit=None, base=0):
         """
         Convenience method for frequency conversion and resampling of regular
         time-series data.
@@ -175,12 +175,16 @@ def resample(self, rule, how='mean', axis=0,
         convention : {'start', 'end', 's', 'e'}
         loffset : timedelta
             Adjust the resampled time labels
+        base : int, default 0
+            For frequencies that evenly subdivide 1 day, the "origin" of the
+            aggregated intervals. For example, for '5min' frequency, base could
+            range from 0 through 4. Defaults to 0
         """
         from pandas.tseries.resample import TimeGrouper
         sampler = TimeGrouper(rule, label=label, closed=closed, how=how,
                               axis=axis, kind=kind, loffset=loffset,
                               fill_method=fill_method, convention=convention,
-                              limit=limit)
+                              limit=limit, base=base)
         return sampler.resample(self)
 
     def first(self, offset):
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 081375f8245ee..20ad5e0ced60f 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -37,7 +37,7 @@ class TimeGrouper(CustomGrouper):
     def __init__(self, freq='Min', closed='right', label='right', how='mean',
                  begin=None, end=None, nperiods=None, axis=0,
                  fill_method=None, limit=None, loffset=None, kind=None,
-                 convention=None):
+                 convention=None, base=0):
         self.freq = freq
         self.closed = closed
         self.label = label
@@ -51,6 +51,7 @@ def __init__(self, freq='Min', closed='right', label='right', how='mean',
         self.how = how
         self.fill_method = fill_method
         self.limit = limit
+        self.base = base
 
     def resample(self, obj):
         axis = obj._get_axis(self.axis)
@@ -88,9 +89,33 @@ def _get_time_grouper(self, obj):
         return binner, grouper
 
     def _get_time_bins(self, axis):
-        return _make_time_bins(axis, self.freq, begin=self.begin,
-                               end=self.end, closed=self.closed,
-                               label=self.label)
+        assert(isinstance(axis, DatetimeIndex))
+
+        if len(axis) == 0:
+            # TODO: Should we be a bit more careful here?
+            return [], [], []
+
+        first, last = _get_range_edges(axis, self.begin, self.end, self.freq,
+                                       closed=self.closed, base=self.base)
+        binner = DatetimeIndex(freq=self.freq, start=first, end=last)
+
+        # a little hack
+        trimmed = False
+        if len(binner) > 2 and binner[-2] == axis[-1]:
+            binner = binner[:-1]
+            trimmed = True
+
+        # general version, knowing nothing about relative frequencies
+        bins = lib.generate_bins_dt64(axis.asi8, binner.asi8, self.closed)
+
+        if self.label == 'right':
+            labels = binner[1:]
+        elif not trimmed:
+            labels = binner[:-1]
+        else:
+            labels = binner
+
+        return binner, bins, labels
 
     def _get_time_period_bins(self, axis):
         return _make_period_bins(axis, self.freq, begin=self.begin,
@@ -210,36 +235,8 @@ def _make_period_bins(axis, freq, begin=None, end=None,
     return binner, bins, labels
 
 
-def _make_time_bins(axis, freq, begin=None, end=None,
-                    closed='right', label='right'):
-    assert(isinstance(axis, DatetimeIndex))
-
-    if len(axis) == 0:
-        # TODO: Should we be a bit more careful here?
-        return [], [], []
-
-    first, last = _get_range_edges(axis, begin, end, freq, closed=closed)
-    binner = DatetimeIndex(freq=freq, start=first, end=last)
-
-    # a little hack
-    trimmed = False
-    if len(binner) > 2 and binner[-2] == axis[-1]:
-        binner = binner[:-1]
-        trimmed = True
-
-    # general version, knowing nothing about relative frequencies
-    bins = lib.generate_bins_dt64(axis.asi8, binner.asi8, closed)
-
-    if label == 'right':
-        labels = binner[1:]
-    elif not trimmed:
-        labels = binner[:-1]
-    else:
-        labels = binner
-
-    return binner, bins, labels
-
-def _get_range_edges(axis, begin, end, offset, closed='left'):
+def _get_range_edges(axis, begin, end, offset, closed='left',
+                     base=0):
     from pandas.tseries.offsets import Tick, _delta_to_microseconds
     if isinstance(offset, basestring):
         offset = to_offset(offset)
@@ -253,7 +250,7 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
         if ((day_micros % offset.micros) == 0 and begin is None
             and end is None):
             return _adjust_dates_anchored(axis[0], axis[-1], offset,
-                                          closed=closed)
+                                          closed=closed, base=base)
 
     if begin is None:
         if closed == 'left':
@@ -271,12 +268,16 @@ def _get_range_edges(axis, begin, end, offset, closed='left'):
     return first, last
 
 
-def _adjust_dates_anchored(first, last, offset, closed='right'):
+def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
     from pandas.tseries.tools import normalize_date
 
     start_day_micros = Timestamp(normalize_date(first)).value
     last_day_micros = Timestamp(normalize_date(last)).value
 
+    base_micros = (base % offset.n) * offset.micros / offset.n
+    start_day_micros += base_micros
+    last_day_micros += base_micros
+
     foffset = (first.value - start_day_micros) % offset.micros
     loffset = (last.value - last_day_micros) % offset.micros
 
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index 5b3613e57620d..875b5c94fa2e1 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -363,6 +363,15 @@ def test_resample_anchored_ticks(self):
             expected = ts.resample(freq, closed='left', label='left')
             assert_series_equal(result, expected)
 
+    def test_resample_base(self):
+        rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s')
+        ts = Series(np.random.randn(len(rng)), index=rng)
+
+        resampled = ts.resample('5min', base=2)
+        exp_rng = date_range('1/1/2000 00:02:00', '1/1/2000 02:02',
+                             freq='5min')
+        self.assert_(resampled.index.equals(exp_rng))
+
     def test_resample_daily_anchored(self):
         rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T')
         ts = Series(np.random.randn(len(rng)), index=rng)

From 8d581c8ed9f65b915bbc8a04b6b2c6744f8bf37d Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 08:32:26 -0400
Subject: [PATCH 069/114] VB: more convenience auto-updates

---
 vb_suite/make.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/vb_suite/make.py b/vb_suite/make.py
index 0b9dd64690e40..52914a76a212d 100755
--- a/vb_suite/make.py
+++ b/vb_suite/make.py
@@ -79,15 +79,72 @@ def check_build():
             pass
 
 def all():
-    # clean()
     html()
 
+def auto_update():
+    msg = ''
+    try:
+        clean()
+        html()
+        latex()
+        upload()
+        uploadpdf()
+    except Exception, inst:
+        msg += str(inst) + '\n'
+
+    if len(msg) == 0:
+        sendmail()
+    else:
+        sendmail(msg)
+
+def sendmail(err_msg=None):
+    from_name = 'drzoidberg@lambdafoundry.com'
+    to_name = 'dev@lambdafoundry.com'
+
+    if err_msg is None:
+        msgstr = 'Daily vbench uploaded successfully'
+        subject = "VB: daily update successful"
+    else:
+        msgstr = err_msg
+        subject = "VB: daily update failed"
+
+    import smtplib
+    from email.MIMEText import MIMEText
+    msg = MIMEText(msgstr)
+    msg['Subject'] = subject
+    msg['From'] = from_name
+    msg['To'] = to_name
+
+    server_str, port, login, pwd = _get_credentials()
+    server = smtplib.SMTP(server_str, port)
+    server.ehlo()
+    server.starttls()
+    server.ehlo()
+
+    server.login(login, pwd)
+    server.sendmail(from_name, to_name, msg.as_string())
+    server.close()
+
+def _get_credentials():
+    cred = '~/tmp/credentials'
+    with open(cred, 'r') as fh:
+        server, port, un, domain = fh.read().split(',')
+    port = int(port)
+    login = un + '@' + domain + '.com'
+
+    import base64
+    with open('~/tmp/cron_email_pwd', 'r') as fh:
+        pwd = base64.b64decode(fh.read())
+
+    return server, port, login, pwd
+
 funcd = {
     'html'     : html,
     'latex'    : latex,
     'clean'    : clean,
     'upload'       : upload,
     'uploadpdf'    : uploadpdf,
+    'auto_update' : auto_update,
     'all'      : all,
     }
 

From 6e09dda045e9beb0d6a83a7456e45253edf2a881 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 08:55:50 -0400
Subject: [PATCH 070/114] VB: get from and to email addresses from config file

---
 vb_suite/make.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vb_suite/make.py b/vb_suite/make.py
index 52914a76a212d..306ba0861ea8e 100755
--- a/vb_suite/make.py
+++ b/vb_suite/make.py
@@ -98,8 +98,7 @@ def auto_update():
         sendmail(msg)
 
 def sendmail(err_msg=None):
-    from_name = 'drzoidberg@lambdafoundry.com'
-    to_name = 'dev@lambdafoundry.com'
+    from_name, to_name = _get_config()
 
     if err_msg is None:
         msgstr = 'Daily vbench uploaded successfully'
@@ -138,6 +137,11 @@ def _get_credentials():
 
     return server, port, login, pwd
 
+def _get_config():
+    with open('~/tmp/config', 'r') as fh:
+        from_name, to_name = fh.read().split(',')
+    return from_name, to_name
+
 funcd = {
     'html'     : html,
     'latex'    : latex,

From 31fefba954afc6b580d3157edc2217cbf457dac9 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 10:23:15 -0400
Subject: [PATCH 071/114] VB: removing cruft; getting config from user folders

---
 vb_suite/make.py | 56 +++++++++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 34 deletions(-)

diff --git a/vb_suite/make.py b/vb_suite/make.py
index 306ba0861ea8e..e90c3525fc310 100755
--- a/vb_suite/make.py
+++ b/vb_suite/make.py
@@ -30,11 +30,6 @@ def upload():
     os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
               ':/usr/share/nginx/pandas/pandas-docs/vbench/ -essh')
 
-def uploadpdf():
-    'push a copy to the sf site'
-    os.system('cd build/latex; scp pandas.pdf wesmckinn,pandas@web.sf.net'
-              ':/home/groups/p/pa/pandas/htdocs/')
-
 def clean():
     if os.path.exists('build'):
         shutil.rmtree('build')
@@ -48,29 +43,10 @@ def html():
                  'source build/html'):
         raise SystemExit("Building HTML failed.")
 
-def latex():
-    check_build()
-    if sys.platform != 'win32':
-        # LaTeX format.
-        if os.system('sphinx-build -b latex -d build/doctrees '
-                     'source build/latex'):
-            raise SystemExit("Building LaTeX failed.")
-        # Produce pdf.
-
-        os.chdir('build/latex')
-
-        # Call the makefile produced by sphinx...
-        if os.system('make'):
-            raise SystemExit("Rendering LaTeX failed.")
-
-        os.chdir('../..')
-    else:
-        print 'latex build has not been tested on windows'
-
 def check_build():
     build_dirs = [
         'build', 'build/doctrees', 'build/html',
-        'build/latex', 'build/plots', 'build/_static',
+        'build/plots', 'build/_static',
         'build/_templates']
     for d in build_dirs:
         try:
@@ -79,6 +55,7 @@ def check_build():
             pass
 
 def all():
+    clean()
     html()
 
 def auto_update():
@@ -86,9 +63,7 @@ def auto_update():
     try:
         clean()
         html()
-        latex()
         upload()
-        uploadpdf()
     except Exception, inst:
         msg += str(inst) + '\n'
 
@@ -121,33 +96,46 @@ def sendmail(err_msg=None):
     server.ehlo()
 
     server.login(login, pwd)
-    server.sendmail(from_name, to_name, msg.as_string())
-    server.close()
+    try:
+        server.sendmail(from_name, to_name, msg.as_string())
+    finally:
+        server.close()
+
+def _get_dir():
+    import getpass
+    USERNAME = getpass.getuser()
+    if sys.platform == 'darwin':
+        HOME = '/Users/%s' % USERNAME
+    else:
+        HOME = '/home/%s' % USERNAME
+
+    tmp_dir = '%s/tmp' % HOME
+    return tmp_dir
 
 def _get_credentials():
-    cred = '~/tmp/credentials'
+    tmp_dir = _get_dir()
+    cred = '%s/credentials' % tmp_dir
     with open(cred, 'r') as fh:
         server, port, un, domain = fh.read().split(',')
     port = int(port)
     login = un + '@' + domain + '.com'
 
     import base64
-    with open('~/tmp/cron_email_pwd', 'r') as fh:
+    with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh:
         pwd = base64.b64decode(fh.read())
 
     return server, port, login, pwd
 
 def _get_config():
-    with open('~/tmp/config', 'r') as fh:
+    tmp_dir = _get_dir()
+    with open('%s/config' % tmp_dir, 'r') as fh:
         from_name, to_name = fh.read().split(',')
     return from_name, to_name
 
 funcd = {
     'html'     : html,
-    'latex'    : latex,
     'clean'    : clean,
     'upload'       : upload,
-    'uploadpdf'    : uploadpdf,
     'auto_update' : auto_update,
     'all'      : all,
     }

From d5b6b93672b3a680e29313ae5ea18ea1bff3a855 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 10:34:13 -0400
Subject: [PATCH 072/114] BUG: floor division for Python 3

---
 pandas/tseries/resample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 20ad5e0ced60f..97025eafa5dc3 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -274,7 +274,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
     start_day_micros = Timestamp(normalize_date(first)).value
     last_day_micros = Timestamp(normalize_date(last)).value
 
-    base_micros = (base % offset.n) * offset.micros / offset.n
+    base_micros = (base % offset.n) * offset.micros // offset.n
     start_day_micros += base_micros
     last_day_micros += base_micros
 

From e275d76f6765023c3c47dcae37d7d8d1c3e93158 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 10:38:34 -0400
Subject: [PATCH 073/114] DOC: function for auto docs build

---
 doc/make.py      | 106 ++++++++++++++++++++++++++++++++++++++---------
 vb_suite/make.py |   7 +---
 2 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/doc/make.py b/doc/make.py
index 8597b2efb7f7c..607b5e3938136 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -25,35 +25,29 @@
 
 SPHINX_BUILD = 'sphinxbuild'
 
-def sf():
-    'push a copy to the sf'
-    os.system('cd build/html; rsync -avz . wesmckinn,pandas@web.sf.net'
-              ':/home/groups/p/pa/pandas/htdocs/ -essh --cvs-exclude')
-
 def upload_dev():
     'push a copy to the pydata dev directory'
-    os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
-              ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh')
+    if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
+                 ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'):
+        raise SystemExit('Upload to Pydata Dev failed')
 
 def upload_dev_pdf():
     'push a copy to the pydata dev directory'
-    os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
-              ':/usr/share/nginx/pandas/pandas-docs/dev/')
+    if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
+                 ':/usr/share/nginx/pandas/pandas-docs/dev/'):
+        raise SystemExit('PDF upload to Pydata Dev failed')
 
 def upload_stable():
-    'push a copy to the pydata dev directory'
-    os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
-              ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh')
+    'push a copy to the pydata stable directory'
+    if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
+                 ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'):
+        raise SystemExit('Upload to stable failed')
 
 def upload_stable_pdf():
     'push a copy to the pydata dev directory'
-    os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
-              ':/usr/share/nginx/pandas/pandas-docs/stable/')
-
-def sfpdf():
-    'push a copy to the sf site'
-    os.system('cd build/latex; scp pandas.pdf wesmckinn,pandas@web.sf.net'
-              ':/home/groups/p/pa/pandas/htdocs/')
+    if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
+                 ':/usr/share/nginx/pandas/pandas-docs/stable/'):
+        raise SystemExit('PDF upload to stable failed')
 
 def clean():
     if os.path.exists('build'):
@@ -102,6 +96,79 @@ def all():
     # clean()
     html()
 
+def auto_dev_build():
+    msg = ''
+    try:
+        clean()
+        html()
+        latex()
+        upload_dev()
+        upload_dev_pdf()
+        sendmail()
+    except (Exception, SystemExit), inst:
+        msg += str(inst) + '\n'
+        sendmail(msg)
+
+def sendmail(err_msg=None):
+    from_name, to_name = _get_config()
+
+    if err_msg is None:
+        msgstr = 'Daily docs build completed successfully'
+        subject = "DOC: daily build successful"
+    else:
+        msgstr = err_msg
+        subject = "DOC: daily build failed"
+
+    import smtplib
+    from email.MIMEText import MIMEText
+    msg = MIMEText(msgstr)
+    msg['Subject'] = subject
+    msg['From'] = from_name
+    msg['To'] = to_name
+
+    server_str, port, login, pwd = _get_credentials()
+    server = smtplib.SMTP(server_str, port)
+    server.ehlo()
+    server.starttls()
+    server.ehlo()
+
+    server.login(login, pwd)
+    try:
+        server.sendmail(from_name, to_name, msg.as_string())
+    finally:
+        server.close()
+
+def _get_dir():
+    import getpass
+    USERNAME = getpass.getuser()
+    if sys.platform == 'darwin':
+        HOME = '/Users/%s' % USERNAME
+    else:
+        HOME = '/home/%s' % USERNAME
+
+    tmp_dir = '%s/tmp' % HOME
+    return tmp_dir
+
+def _get_credentials():
+    tmp_dir = _get_dir()
+    cred = '%s/credentials' % tmp_dir
+    with open(cred, 'r') as fh:
+        server, port, un, domain = fh.read().split(',')
+    port = int(port)
+    login = un + '@' + domain + '.com'
+
+    import base64
+    with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh:
+        pwd = base64.b64decode(fh.read())
+
+    return server, port, login, pwd
+
+def _get_config():
+    tmp_dir = _get_dir()
+    with open('%s/config' % tmp_dir, 'r') as fh:
+        from_name, to_name = fh.read().split(',')
+    return from_name, to_name
+
 funcd = {
     'html'     : html,
     'upload_dev' : upload_dev,
@@ -112,6 +179,7 @@ def all():
     'clean'    : clean,
     'sf'       : sf,
     'sfpdf'    : sfpdf,
+    'auto_dev' : auto_dev_build,
     'all'      : all,
     }
 
diff --git a/vb_suite/make.py b/vb_suite/make.py
index e90c3525fc310..c97b9c924150c 100755
--- a/vb_suite/make.py
+++ b/vb_suite/make.py
@@ -64,12 +64,9 @@ def auto_update():
         clean()
         html()
         upload()
-    except Exception, inst:
-        msg += str(inst) + '\n'
-
-    if len(msg) == 0:
         sendmail()
-    else:
+    except (Exception, SystemExit), inst:
+        msg += str(inst) + '\n'
         sendmail(msg)
 
 def sendmail(err_msg=None):

From 18d9a13e183e10b8c7493fd23bbb4ac957b08a1f Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 10:39:51 -0400
Subject: [PATCH 074/114] DOC: removed lingering sourceforge references

---
 doc/make.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/doc/make.py b/doc/make.py
index 607b5e3938136..d8f2d9840cb68 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -177,8 +177,6 @@ def _get_config():
     'upload_stable_pdf' : upload_stable_pdf,
     'latex'    : latex,
     'clean'    : clean,
-    'sf'       : sf,
-    'sfpdf'    : sfpdf,
     'auto_dev' : auto_dev_build,
     'all'      : all,
     }

From 545e917c77b5876882e857ccbe3f9d9876c01c84 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 11:07:43 -0400
Subject: [PATCH 075/114] DOC: removed lingering timeRule keyword use

---
 doc/source/missing_data.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index d724938c29451..293832e23c414 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -204,8 +204,7 @@ for interpolation methods outside of the filling methods described above.
    :suppress:
 
    np.random.seed(123456)
-   ts = Series(randn(100), index=date_range('1/1/2000', periods=100,
-                                           timeRule='EOM'))
+   ts = Series(randn(100), index=date_range('1/1/2000', periods=100, freq='BM'))
    ts[20:40] = np.nan
    ts[60:80] = np.nan
    ts = ts.cumsum()

From 40d9a3b265cdc0f994beb0a71789b8280d97c1e6 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 14:28:26 -0400
Subject: [PATCH 076/114] ENH: very basic ordered_merge with forward filling,
 not with multiple groups yet

---
 pandas/__init__.py               |  2 +-
 pandas/core/frame.py             |  4 +-
 pandas/src/join.pyx              | 33 +++++++++++--
 pandas/tools/merge.py            | 79 +++++++++++++++++++++++++++++---
 pandas/tools/tests/test_merge.py | 58 ++++++++++++++++++++---
 5 files changed, 158 insertions(+), 18 deletions(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index 94400d1172935..7ef0ba10c1aa0 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -34,7 +34,7 @@
 from pandas.io.pytables import HDFStore
 from pandas.util.testing import debug
 
-from pandas.tools.merge import merge, concat
+from pandas.tools.merge import merge, concat, ordered_merge
 from pandas.tools.pivot import pivot_table, crosstab
 from pandas.tools.plotting import scatter_matrix
 from pandas.tools.describe import value_range
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6048a6b678d3b..e0ffa17de9993 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -152,7 +152,7 @@
 3   foo  4         3   bar  8
 
 >>> merge(A, B, left_on='lkey', right_on='rkey', how='outer')
-   lkey  value.x  rkey  value.y
+   lkey  value_x  rkey  value_y
 0  bar   2        bar   6
 1  bar   2        bar   8
 2  baz   3        NaN   NaN
@@ -3511,7 +3511,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
     @Appender(_merge_doc, indents=2)
     def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
               left_index=False, right_index=False, sort=True,
-              suffixes=('.x', '.y'), copy=True):
+              suffixes=('_x', '_y'), copy=True):
         from pandas.tools.merge import merge
         return merge(self, right, how=how, on=on,
                      left_on=left_on, right_on=right_on,
diff --git a/pandas/src/join.pyx b/pandas/src/join.pyx
index 502635012ad39..a135a1c86126b 100644
--- a/pandas/src/join.pyx
+++ b/pandas/src/join.pyx
@@ -118,8 +118,9 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
     return left_indexer, right_indexer
 
 
+
 def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
-                    Py_ssize_t max_groups):
+                          Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
@@ -143,8 +144,8 @@ def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
 
     # group 0 is the NA group
     cdef:
-        Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
-        Py_ssize_t offset
+        int64_t left_pos = 0, right_pos = 0
+        Py_ssize_t offset, position = 0
 
     # exclude the NA group
     left_pos = left_count[0]
@@ -180,6 +181,8 @@ def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
     return (_get_result_indexer(left_sorter, left_indexer),
             _get_result_indexer(right_sorter, right_indexer))
 
+
+
 def _get_result_indexer(sorter, indexer):
     if indexer.dtype != np.int_:
         indexer = indexer.astype(np.int_)
@@ -188,6 +191,30 @@ def _get_result_indexer(sorter, indexer):
     return res
 
 
+def ffill_by_group(ndarray[int64_t] indexer, ndarray[int64_t] group_ids,
+                   int64_t max_group):
+    cdef:
+        Py_ssize_t i, n = len(indexer)
+        ndarray[int64_t] result, last_obs
+        int64_t gid, val
+
+    result = np.empty(n, dtype=np.int64)
+
+    last_obs = np.empty(max_group, dtype=np.int64)
+    last_obs.fill(-1)
+
+    for i in range(n):
+        gid = group_ids[i]
+        val = indexer[i]
+        if val == -1:
+            result[i] = last_obs[gid]
+        else:
+            result[i] = val
+            last_obs[gid] = val
+
+    return result
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def join_sorter(ndarray[int64_t] index, Py_ssize_t ngroups):
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index d6f65667929dd..680864ee542c1 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -24,7 +24,7 @@
 @Appender(_merge_doc, indents=0)
 def merge(left, right, how='inner', on=None, left_on=None, right_on=None,
           left_index=False, right_index=False, sort=True,
-          suffixes=('.x', '.y'), copy=True):
+          suffixes=('_x', '_y'), copy=True):
     op = _MergeOperation(left, right, how=how, on=on, left_on=left_on,
                          right_on=right_on, left_index=left_index,
                          right_index=right_index, sort=sort, suffixes=suffixes,
@@ -33,6 +33,19 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None,
 if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame'
 
 
+def ordered_merge(left, right, on=None, by=None, left_on=None, right_on=None,
+                  left_index=False, right_index=False, fill_method=None,
+                  suffixes=('_x', '_y')):
+    """
+
+    """
+    op = _OrderedMerge(left, right, on=on, left_on=left_on,
+                       right_on=right_on, left_index=left_index,
+                       right_index=right_index, suffixes=suffixes,
+                       fill_method=fill_method, by=by)
+    return op.get_result()
+
+
 
 # TODO: NA group handling
 # TODO: transformations??
@@ -47,7 +60,7 @@ class _MergeOperation(object):
     def __init__(self, left, right, how='inner', on=None,
                  left_on=None, right_on=None, axis=1,
                  left_index=False, right_index=False, sort=True,
-                 suffixes=('.x', '.y'), copy=True):
+                 suffixes=('_x', '_y'), copy=True):
         self.left = self.orig_left = left
         self.right = self.orig_right = right
         self.how = how
@@ -325,6 +338,60 @@ def _get_group_keys(self):
                              sort=self.sort)
         return left_group_key, right_group_key, max_groups
 
+
+class _OrderedMerge(_MergeOperation):
+
+    def __init__(self, left, right, on=None, by=None, left_on=None,
+                 right_on=None, axis=1, left_index=False, right_index=False,
+                 suffixes=('_x', '_y'), copy=True,
+                 fill_method=None):
+
+        self.by = by
+        self.fill_method = fill_method
+
+        _MergeOperation.__init__(self, left, right, on=on, left_on=left_on,
+                                 right_on=right_on, axis=axis,
+                                 left_index=left_index,
+                                 right_index=right_index,
+                                 how='outer', suffixes=suffixes,
+                                 sort=True # sorts when factorizing
+                                 )
+
+
+    def get_result(self):
+        join_index, left_indexer, right_indexer = self._get_join_info()
+
+        # this is a bit kludgy
+        ldata, rdata = self._get_merge_data()
+
+        if self.fill_method == 'ffill':
+            # group_index, max_group = self._get_group_index()
+
+            group_index = np.repeat(0, len(left_indexer))
+            max_group = 1
+
+            left_join_indexer = lib.ffill_by_group(left_indexer, group_index,
+                                                   max_group)
+            right_join_indexer = lib.ffill_by_group(right_indexer, group_index,
+                                                    max_group)
+        else:
+            left_join_indexer = left_indexer
+            right_join_indexer = right_indexer
+
+        join_op = _BlockJoinOperation([ldata, rdata], join_index,
+                                      [left_join_indexer, right_join_indexer],
+                                      axis=1, copy=self.copy)
+
+        result_data = join_op.get_result()
+        result = DataFrame(result_data)
+
+        self._maybe_add_join_keys(result, left_indexer, right_indexer)
+
+        return result
+
+    def _get_group_index(self):
+        pass
+
 def _get_multiindex_indexer(join_keys, index, sort=False):
     shape = []
     labels = []
@@ -357,10 +424,6 @@ def _get_single_indexer(join_key, index, sort=False):
 
     return left_indexer, right_indexer
 
-def _right_outer_join(x, y, max_groups):
-    right_indexer, left_indexer = lib.left_outer_join(y, x, max_groups)
-    return left_indexer, right_indexer
-
 def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
     join_index = left_ax
     left_indexer = None
@@ -387,6 +450,10 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
     return join_index, left_indexer, right_indexer
 
 
+def _right_outer_join(x, y, max_groups):
+    right_indexer, left_indexer = lib.left_outer_join(y, x, max_groups)
+    return left_indexer, right_indexer
+
 _join_functions = {
     'inner' : lib.inner_join,
     'left' : lib.left_outer_join,
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 4482e05295cd5..75d432af94e27 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -4,12 +4,13 @@
 import unittest
 
 from numpy.random import randn
+from numpy import nan
 import numpy as np
 import random
 
 from pandas import *
 from pandas.tseries.index import DatetimeIndex
-from pandas.tools.merge import merge, concat
+from pandas.tools.merge import merge, concat, ordered_merge
 from pandas.util.testing import (assert_frame_equal, assert_series_equal,
                                  assert_almost_equal, rands)
 import pandas._tseries as lib
@@ -463,8 +464,8 @@ def test_merge_overlap(self):
         merged = merge(self.left, self.left, on='key')
         exp_len = (self.left['key'].value_counts() ** 2).sum()
         self.assertEqual(len(merged), exp_len)
-        self.assert_('v1.x' in merged)
-        self.assert_('v1.y' in merged)
+        self.assert_('v1_x' in merged)
+        self.assert_('v1_y' in merged)
 
     def test_merge_different_column_key_names(self):
         left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
@@ -479,8 +480,8 @@ def test_merge_different_column_key_names(self):
                             ['bar', 'baz', 'foo', 'foo', 'foo', 'foo', np.nan])
         assert_almost_equal(merged['rkey'],
                             ['bar', np.nan, 'foo', 'foo', 'foo', 'foo', 'qux'])
-        assert_almost_equal(merged['value.x'], [2, 3, 1, 1, 4, 4, np.nan])
-        assert_almost_equal(merged['value.y'], [6, np.nan, 5, 8, 5, 8, 7])
+        assert_almost_equal(merged['value_x'], [2, 3, 1, 1, 4, 4, np.nan])
+        assert_almost_equal(merged['value_y'], [6, np.nan, 5, 8, 5, 8, 7])
 
     def test_merge_nocopy(self):
         left = DataFrame({'a' : 0, 'b' : 1}, index=range(10))
@@ -656,7 +657,7 @@ def test_left_merge_na_buglet(self):
         tm.assert_frame_equal(merged, expected)
 
 def _check_join(left, right, result, join_col, how='left',
-                lsuffix='.x', rsuffix='.y'):
+                lsuffix='_x', rsuffix='_y'):
 
     # some smoke tests
     for c in join_col:
@@ -1248,6 +1249,51 @@ def test_mixed_type_join_with_suffix(self):
         # it works!
         mn.join(cn, rsuffix='_right')
 
+
+class TestOrderedMerge(unittest.TestCase):
+
+    def setUp(self):
+        self.left = DataFrame({'key': ['a', 'c', 'e'],
+                               'lvalue': [1, 2., 3]})
+
+        self.right = DataFrame({'key': ['b', 'c', 'd', 'f'],
+                                'rvalue': [1, 2, 3., 4]})
+
+    # GH #813
+
+    def test_basic(self):
+        result = ordered_merge(self.left, self.right, on='key')
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
+                              'lvalue': [1, nan, 2, nan, 3, nan],
+                              'rvalue': [nan, 1, 2, 3, nan, 4]})
+
+        assert_frame_equal(result, expected)
+
+    def test_ffill(self):
+        result = ordered_merge(self.left, self.right, on='key', fill_method='ffill')
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
+                              'lvalue': [1., 1, 2, 2, 3, 3.],
+                              'rvalue': [nan, 1, 2, 3, 3, 4]})
+        assert_frame_equal(result, expected)
+
+    def test_multigroup(self):
+        raise nose.SkipTest
+        left = concat([self.left, self.left], ignore_index=True)
+        right = concat([self.right, self.right], ignore_index=True)
+
+        left['group'] = ['a'] * 3 + ['b'] * 3
+        right['group'] = ['a'] * 4 + ['b'] * 4
+
+        result = ordered_merge(left, right, on='key', by='group',
+                               fill_method='ffill')
+
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
+                              'lvalue': [1., 1, 2, 2, 3, 3.],
+                              'rvalue': [nan, 1, 2, 3, 3, 4]})
+        expected['group'] = ['a'] * 6 + ['b'] * 6
+
+        assert_frame_equal(result, expected)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],

From 69229e756ca0cff7e142bd5a489680ab7103415a Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 15:18:18 -0400
Subject: [PATCH 077/114] ENH: add group-wise merge capability to
 ordered_merge, unit tests, close #813

---
 RELEASE.rst                      |   2 +
 pandas/src/join.pyx              |  21 ++++++
 pandas/tools/merge.py            | 126 +++++++++++++++++++++++++------
 pandas/tools/tests/test_merge.py |  23 +++---
 vb_suite/join_merge.py           |  17 +++++
 5 files changed, 155 insertions(+), 34 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 24ab824914b98..1c9b24815ad0d 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -48,6 +48,8 @@ pandas 0.8.0
     a particular order (#610)
   - Can pass dicts with lists of functions or dicts to GroupBy aggregate to do
     much more flexible multiple function aggregation (#642)
+  - New ordered_merge functions for merging DataFrames with ordered
+    data. Also supports group-wise merging for panel data (#813)
 
 **Improvements to existing features**
 
diff --git a/pandas/src/join.pyx b/pandas/src/join.pyx
index a135a1c86126b..06d00fe2e16f7 100644
--- a/pandas/src/join.pyx
+++ b/pandas/src/join.pyx
@@ -191,6 +191,27 @@ def _get_result_indexer(sorter, indexer):
     return res
 
 
+
+def ffill_indexer(ndarray[int64_t] indexer):
+    cdef:
+        Py_ssize_t i, n = len(indexer)
+        ndarray[int64_t] result
+        int64_t val, last_obs
+
+    result = np.empty(n, dtype=np.int64)
+    last_obs = -1
+
+    for i in range(n):
+        val = indexer[i]
+        if val == -1:
+            result[i] = last_obs
+        else:
+            result[i] = val
+            last_obs = val
+
+    return result
+
+
 def ffill_by_group(ndarray[int64_t] indexer, ndarray[int64_t] group_ids,
                    int64_t max_group):
     cdef:
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index 680864ee542c1..eaf833f47dd7b 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -33,17 +33,107 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None,
 if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame'
 
 
-def ordered_merge(left, right, on=None, by=None, left_on=None, right_on=None,
-                  left_index=False, right_index=False, fill_method=None,
-                  suffixes=('_x', '_y')):
-    """
+def ordered_merge(left, right, on=None, left_by=None, right_by=None,
+                  left_on=None, right_on=None,
+                  fill_method=None, suffixes=('_x', '_y')):
+    """Perform merge with optional filling/interpolation designed for ordered
+    data like time series data. Optionally perform group-wise merge (see
+    examples)
+
+    Parameters
+    ----------
+    left : DataFrame
+    right : DataFrame
+    fill_method : {'ffill', None}, default None
+        Interpolation method for data
+    on : label or list
+        Field names to join on. Must be found in both DataFrames.
+    left_on : label or list, or array-like
+        Field names to join on in left DataFrame. Can be a vector or list of
+        vectors of the length of the DataFrame to use a particular vector as
+        the join key instead of columns
+    right_on : label or list, or array-like
+        Field names to join on in right DataFrame or vector/list of vectors per
+        left_on docs
+    left_by : column name or list of column names
+        Group left DataFrame by group columns and merge piece by piece with
+        right DataFrame
+    right_by : column name or list of column names
+        Group right DataFrame by group columns and merge piece by piece with
+        left DataFrame
+    suffixes : 2-length sequence (tuple, list, ...)
+        Suffix to apply to overlapping column names in the left and right
+        side, respectively
+
+    Examples
+    --------
+    >>> A                      >>> B
+          key  lvalue group        key  rvalue
+    0   a       1     a        0     b       1
+    1   c       2     a        1     c       2
+    2   e       3     a        2     d       3
+    3   a       1     b
+    4   c       2     b
+    5   e       3     b
+
+    >>> ordered_merge(A, B, fill_method='ffill', left_by='group')
+       key  lvalue group  rvalue
+    0    a       1     a     NaN
+    1    b       1     a       1
+    2    c       2     a       2
+    3    d       2     a       3
+    4    e       3     a       3
+    5    f       3     a       4
+    6    a       1     b     NaN
+    7    b       1     b       1
+    8    c       2     b       2
+    9    d       2     b       3
+    10   e       3     b       3
+    11   f       3     b       4
 
+    Returns
+    -------
+    merged : DataFrame
     """
-    op = _OrderedMerge(left, right, on=on, left_on=left_on,
-                       right_on=right_on, left_index=left_index,
-                       right_index=right_index, suffixes=suffixes,
-                       fill_method=fill_method, by=by)
-    return op.get_result()
+    def _merger(x, y):
+        op = _OrderedMerge(x, y, on=on, left_on=left_on, right_on=right_on,
+                           # left_index=left_index, right_index=right_index,
+                           suffixes=suffixes, fill_method=fill_method)
+        return op.get_result()
+
+    if left_by is not None and right_by is not None:
+        raise ValueError('Can only group either left or right frames')
+    elif left_by is not None:
+        if not isinstance(left_by, (list, tuple)):
+            left_by = [left_by]
+        pieces = []
+        for key, xpiece in left.groupby(left_by):
+            merged = _merger(xpiece, right)
+            for k in left_by:
+                # May have passed ndarray
+                try:
+                    if k in merged:
+                        merged[k] = key
+                except:
+                    pass
+            pieces.append(merged)
+        return concat(pieces, ignore_index=True)
+    elif right_by is not None:
+        if not isinstance(right_by, (list, tuple)):
+            right_by = [right_by]
+        pieces = []
+        for key, ypiece in right.groupby(right_by):
+            merged = _merger(left, ypiece)
+            for k in right_by:
+                try:
+                    if k in merged:
+                        merged[k] = key
+                except:
+                    pass
+            pieces.append(merged)
+        return concat(pieces, ignore_index=True)
+    else:
+        return _merger(left, right)
 
 
 
@@ -158,9 +248,6 @@ def _get_join_info(self):
             # max groups = largest possible number of distinct groups
             left_key, right_key, max_groups = self._get_group_keys()
 
-            # left_key = com._ensure_int64(left_key)
-            # right_key = com._ensure_int64(right_key)
-
             join_func = _join_functions[self.how]
             left_indexer, right_indexer = join_func(left_key, right_key,
                                                     max_groups)
@@ -346,7 +433,6 @@ def __init__(self, left, right, on=None, by=None, left_on=None,
                  suffixes=('_x', '_y'), copy=True,
                  fill_method=None):
 
-        self.by = by
         self.fill_method = fill_method
 
         _MergeOperation.__init__(self, left, right, on=on, left_on=left_on,
@@ -365,15 +451,8 @@ def get_result(self):
         ldata, rdata = self._get_merge_data()
 
         if self.fill_method == 'ffill':
-            # group_index, max_group = self._get_group_index()
-
-            group_index = np.repeat(0, len(left_indexer))
-            max_group = 1
-
-            left_join_indexer = lib.ffill_by_group(left_indexer, group_index,
-                                                   max_group)
-            right_join_indexer = lib.ffill_by_group(right_indexer, group_index,
-                                                    max_group)
+            left_join_indexer = lib.ffill_indexer(left_indexer)
+            right_join_indexer = lib.ffill_indexer(right_indexer)
         else:
             left_join_indexer = left_indexer
             right_join_indexer = right_indexer
@@ -389,9 +468,6 @@ def get_result(self):
 
         return result
 
-    def _get_group_index(self):
-        pass
-
 def _get_multiindex_indexer(join_keys, index, sort=False):
     shape = []
     labels = []
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 75d432af94e27..701acfddf5ea5 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -1277,22 +1277,27 @@ def test_ffill(self):
         assert_frame_equal(result, expected)
 
     def test_multigroup(self):
-        raise nose.SkipTest
         left = concat([self.left, self.left], ignore_index=True)
-        right = concat([self.right, self.right], ignore_index=True)
+        # right = concat([self.right, self.right], ignore_index=True)
 
         left['group'] = ['a'] * 3 + ['b'] * 3
-        right['group'] = ['a'] * 4 + ['b'] * 4
+        # right['group'] = ['a'] * 4 + ['b'] * 4
 
-        result = ordered_merge(left, right, on='key', by='group',
+        result = ordered_merge(left, self.right, on='key', left_by='group',
                                fill_method='ffill')
-
-        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
-                              'lvalue': [1., 1, 2, 2, 3, 3.],
-                              'rvalue': [nan, 1, 2, 3, 3, 4]})
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'] * 2,
+                              'lvalue': [1., 1, 2, 2, 3, 3.] * 2,
+                              'rvalue': [nan, 1, 2, 3, 3, 4] * 2})
         expected['group'] = ['a'] * 6 + ['b'] * 6
 
-        assert_frame_equal(result, expected)
+        assert_frame_equal(result, expected.ix[:, result.columns])
+
+        result2 = ordered_merge(self.right, left, on='key', right_by='group',
+                                fill_method='ffill')
+        assert_frame_equal(result, result2.ix[:, result.columns])
+
+        result = ordered_merge(left, self.right, on='key', left_by='group')
+        self.assert_(result['group'].notnull().all())
 
 if __name__ == '__main__':
     import nose
diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py
index 657ca398f01bb..07fcfcb5ddc14 100644
--- a/vb_suite/join_merge.py
+++ b/vb_suite/join_merge.py
@@ -150,3 +150,20 @@ def sample(values, k):
 
 concat_series_axis1 = Benchmark('concat(pieces, axis=1)', setup,
                                 start_date=datetime(2012, 2, 27))
+
+#----------------------------------------------------------------------
+# Ordered merge
+
+setup = common_setup + """
+groups = np.array([rands(10) for _ in xrange(10)], dtype='O')
+
+left = DataFrame({'group': groups.repeat(5000),
+                  'key' : np.tile(np.arange(0, 10000, 2), 10),
+                  'lvalue': np.random.randn(50000)})
+
+right = DataFrame({'key' : np.arange(10000),
+                   'rvalue' : np.random.randn(10000)})
+
+"""
+
+stmt = "ordered_merge(left, right, on='key', left_by='group')"

From 9e2142bed5bf3f684a520e82e59250b56c51279f Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 15:54:10 -0400
Subject: [PATCH 078/114] BUG: ensure_platform_int actually makes lots of
 copies

---
 pandas/src/generate_code.py  | 14 +++++++++++++-
 pandas/src/generated.pyx     | 22 ++++++++++++----------
 pandas/tests/test_tseries.py |  5 +++++
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 5c3c3784f2277..643d70831074f 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -44,6 +44,18 @@
 # initialize numpy
 import_array()
 import_ufunc()
+
+cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.int_)).descr.type_num
+
+cpdef ensure_platform_int(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == PLATFORM_INT:
+            return arr
+        else:
+            return arr.astype(np.int_)
+    else:
+        return np.array(arr, dtype=np.int_)
+
 """
 
 take_1d_template = """@cython.wraparound(False)
@@ -828,7 +840,7 @@ def outer_join_indexer_%(name)s(ndarray[%(c_type)s] left,
     ('float64', 'FLOAT64', 'float64'),
     ('int32', 'INT32', 'int32'),
     ('int64', 'INT64', 'int64'),
-    ('platform_int', 'INT', 'int_'),
+    # ('platform_int', 'INT', 'int_'),
     ('object', 'OBJECT', 'object_'),
 ]
 
diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx
index 96f989d8cd506..9a275c806c0eb 100644
--- a/pandas/src/generated.pyx
+++ b/pandas/src/generated.pyx
@@ -43,6 +43,18 @@ PyDateTime_IMPORT
 import_array()
 import_ufunc()
 
+cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.int_)).descr.type_num
+
+cpdef ensure_platform_int(object arr):
+    if util.is_array(arr):
+        if (<ndarray> arr).descr.type_num == PLATFORM_INT:
+            return arr
+        else:
+            return arr.astype(np.int_)
+    else:
+        return np.array(arr, dtype=np.int_)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cpdef map_indices_float64(ndarray[float64_t] index):
@@ -3337,16 +3349,6 @@ cpdef ensure_int64(object arr):
         return np.array(arr, dtype=np.int64)
 
 
-cpdef ensure_platform_int(object arr):
-    if util.is_array(arr):
-        if (<ndarray> arr).descr.type_num == NPY_INT:
-            return arr
-        else:
-            return arr.astype(np.int_)
-    else:
-        return np.array(arr, dtype=np.int_)
-
-
 cpdef ensure_object(object arr):
     if util.is_array(arr):
         if (<ndarray> arr).descr.type_num == NPY_OBJECT:
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index a25dc60053a18..318f782371f73 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -163,6 +163,11 @@ def test_groupsort_indexer():
     expected = np.lexsort((b, a))
     assert(np.array_equal(result, expected))
 
+def test_ensure_platform_int():
+    arr = np.arange(100)
+
+    result = algos.ensure_platform_int(arr)
+    assert(result is arr)
 
 def test_duplicated_with_nas():
     keys = [0, 1, nan, 0, 2, nan]

From 5891ad5d51c74eabc7c4148d6c0d9304d83b8cff Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 15:58:52 -0400
Subject: [PATCH 079/114] RLS: release notes, close #1239

---
 RELEASE.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 1c9b24815ad0d..9f70654436a43 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -73,6 +73,8 @@ pandas 0.8.0
     (#1073)
   - Change BDay (business day) to not normalize dates by default
   - Remove deprecated DataMatrix name
+  - Default merge suffixes for overlap now have underscores instead of periods
+    to facilitate tab completion, etc. (#1239)
 
 **Bug fixes**
 

From 42d1c90cbba49af60f193d1d537442a398ca499d Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 16:11:28 -0400
Subject: [PATCH 080/114] BLD: 32-bit compat fixes per #1242

---
 pandas/io/pytables.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index dec9616cfba8c..f41952d399a69 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -850,7 +850,8 @@ def _read_panel_table(self, group, where=None):
         key = major.labels * K + minor.labels
 
         if len(unique(key)) == len(key):
-            sorter, _ = lib.groupsort_indexer(key, J * K)
+            sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K)
+            sorter = com._ensure_platform_int(sorter)
 
             # the data need to be sorted
             sorted_values = values.take(sorter, axis=0)
@@ -879,6 +880,7 @@ def _read_panel_table(self, group, where=None):
             unique_tuples = _asarray_tuplesafe(unique_tuples)
 
             indexer = match(unique_tuples, tuple_index)
+            indexer = com._ensure_platform_int(indexer)
 
             new_index = long_index.take(indexer)
             new_values = lp.values.take(indexer, axis=0)

From f1c6c893aaff7ff1b565cae8fea798acbdf39ce3 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 16:16:13 -0400
Subject: [PATCH 081/114] ENH: add keys() method to DataFrame, close #1240

---
 pandas/core/frame.py       | 3 +++
 pandas/tests/test_frame.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e0ffa17de9993..ce870c7441caf 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -530,6 +530,9 @@ def __iter__(self):
         """
         return iter(self.columns)
 
+    def keys(self):
+        return self.columns
+
     def iteritems(self):
         """Iterator over (column, series) pairs"""
         return ((k, self[k]) for k in self.columns)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index ff25c7cde01a8..a1f317f342b1b 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1322,6 +1322,9 @@ def test_set_columns(self):
         self.assertRaises(Exception, setattr, self.mixed_frame, 'columns',
                           cols[::2])
 
+    def test_keys(self):
+        self.assert_(self.frame.keys() is self.frame.columns)
+
     def test_column_contains_typeerror(self):
         try:
             self.frame.columns in self.frame

From 6e8bbeda2c7c9fd307b83150edabed448fb29ab2 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 16:21:10 -0400
Subject: [PATCH 082/114] DOC: release notes

---
 RELEASE.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 9f70654436a43..61e10b964a895 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -50,6 +50,7 @@ pandas 0.8.0
     much more flexible multiple function aggregation (#642)
   - New ordered_merge functions for merging DataFrames with ordered
     data. Also supports group-wise merging for panel data (#813)
+  - Add keys() method to DataFrame
 
 **Improvements to existing features**
 

From e50c7d8f2ec3ca2f252899666c5068992dbc5c15 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 8 May 2012 00:54:05 -0400
Subject: [PATCH 083/114] TST: test cases for replace method. #929

---
 pandas/tests/test_frame.py  | 25 +++++++++++++++++++++++++
 pandas/tests/test_panel.py  | 37 +++++++++++++++++++++++++++++++++++++
 pandas/tests/test_series.py | 23 +++++++++++++++++++++++
 3 files changed, 85 insertions(+)

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a1f317f342b1b..9bfe029b1bce1 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -5578,6 +5578,31 @@ def test_bool_raises_value_error_1069(self):
         df = DataFrame([1, 2, 3])
         self.failUnlessRaises(ValueError, lambda: bool(df))
 
+    def test_replace(self):
+        N = 100
+        df = DataFrame(np.fabs(np.random.randn(len(N), 5)),
+                       index=tm.makeDataIndex(N))
+        df.ix[:5, 0] = np.nan
+        df[6:10, 1] = 'foo'
+        df[20:30, 2] = 'bar'
+
+        rs = df.replace([np.nan, 'foo', 'bar'], -1)
+        self.assert_((rs.ix[:5, 0] == -1).all())
+        self.assert_((rs.ix[6:10, 1] == -1).all())
+        self.assert_((rs.ix[20:30, 2] == -1).all())
+        self.assert_((df >= 0).all())
+
+        rs = df.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
+        self.assert_((rs.ix[:5, 0] == -1).all())
+        self.assert_((rs.ix[6:10, 1] == -2).all())
+        self.assert_((rs.ix[20:30, 2] == -3).all())
+        self.assert_((df >= 0).all())
+
+        df.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
+        self.assert_((df.ix[:5, 0] == -1).all())
+        self.assert_((df.ix[6:10, 1] == -1).all())
+        self.assert_((df.ix[20:30, 2] == -1).all())
+
 if __name__ == '__main__':
     # unittest.main()
     import nose
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index e1441e9d7f4ff..8a2652f751f68 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1294,6 +1294,43 @@ def test_pivot(self):
         # corner case, empty
         df = pivot(np.array([]), np.array([]), np.array([]))
 
+    def test_replace(self):
+        N = 100
+        df1 = DataFrame(np.fabs(np.random.randn(len(N), 5)),
+                        index=tm.makeDataIndex(N))
+        df1.ix[:5, 0] = np.nan
+        df1[6:10, 1] = 'foo'
+        df1[20:30, 2] = 'bar'
+
+        df2 = DataFrame(np.fabs(np.random.randn(len(N), 5)),
+                       index=tm.makeDataIndex(N))
+        df2.ix[:5, 0] = 'bar'
+        df2[6:10, 1] = np.nan
+        df2[20:30, 2] = 'foo'
+
+        panel = Panel({'x' : df1, 'y' : df2})
+        rs = panel.replace([np.nan, 'foo', 'bar'], -1)
+        self.assert_((rs.ix[:, :5, 0] == -1).all())
+        self.assert_((rs.ix[:, 6:10, 1] == -1).all())
+        self.assert_((rs.ix[:, 20:30, 2] == -1).all())
+        self.assert_((panel >= 0).all())
+
+        rs = panel.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
+        self.assert_((rs.ix[0, :5, 0] == -1).all())
+        self.assert_((rs.ix[0, 6:10, 1] == -2).all())
+        self.assert_((rs.ix[0, 20:30, 2] == -3).all())
+
+        self.assert_((rs.ix[1, :5, 0] == -3).all())
+        self.assert_((rs.ix[1, 6:10, 1] == -1).all())
+        self.assert_((rs.ix[1, 20:30, 2] == -2).all())
+
+        self.assert_((panel >= 0).all())
+
+        panel.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
+        self.assert_((panel.ix[:5, 0] == -1).all())
+        self.assert_((panel.ix[6:10, 1] == -1).all())
+        self.assert_((panel.ix[20:30, 2] == -1).all())
+
 def test_monotonic():
     pos = np.array([1, 2, 3, 5])
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 4b8248dcc7bcd..bd1b557c9c15a 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2701,6 +2701,29 @@ def test_timeseries_coercion(self):
         self.assert_(isinstance(ser, TimeSeries))
         self.assert_(isinstance(ser.index, DatetimeIndex))
 
+    def test_replace(self):
+        N = 100
+        ser = Series(np.fabs(np.random.randn(len(N))), tm.makeDataIndex(N))
+        ser[:5] = np.nan
+        ser[6:10] = 'foo'
+        ser[20:30] = 'bar'
+
+        rs = ser.replace([np.nan, 'foo', 'bar'], -1)
+        self.assert_((rs[:5] == -1).all())
+        self.assert_((rs[6:10] == -1).all())
+        self.assert_((rs[20:30] == -1).all())
+        self.assert_((ser >= 0).all())
+
+        rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
+        self.assert_((rs[:5] == -1).all())
+        self.assert_((rs[6:10] == -2).all())
+        self.assert_((rs[20:30] == -3).all())
+        self.assert_((ser >= 0).all())
+
+        ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
+        self.assert_((ser[:5] == -1).all())
+        self.assert_((ser[6:10] == -1).all())
+        self.assert_((ser[20:30] == -1).all())
     def test_repeat(self):
         s = Series(np.random.randn(3), index=['a', 'b', 'c'])
 

From b0e13c105879357d61f7f804dfe7b7afd956fc1b Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 8 May 2012 17:54:29 -0400
Subject: [PATCH 084/114] ENH: Series.replace #929

---
 pandas/core/common.py       |  30 +++++++---
 pandas/core/series.py       | 108 ++++++++++++++++++++++++++++++++++--
 pandas/tests/test_series.py |  26 ++++++++-
 3 files changed, 148 insertions(+), 16 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 8449359edf520..cb1e457fa1c0a 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -372,7 +372,7 @@ def wrapper(arr, mask, limit=None):
 _backfill_1d_datetime = _interp_wrapper(_algos.backfill_inplace_int64, np.int64)
 _backfill_2d_datetime = _interp_wrapper(_algos.backfill_2d_inplace_int64, np.int64)
 
-def pad_1d(values, limit=None):
+def pad_1d(values, limit=None, mask=None):
     if is_float_dtype(values):
         _method = _algos.pad_inplace_float64
     elif is_datetime64_dtype(values):
@@ -382,9 +382,12 @@ def pad_1d(values, limit=None):
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
-    _method(values, isnull(values).view(np.uint8), limit=limit)
+    if mask is None:
+        mask = isnull(values)
+    mask = mask.view(np.uint8)
+    _method(values, mask, limit=limit)
 
-def backfill_1d(values, limit=None):
+def backfill_1d(values, limit=None, mask=None):
     if is_float_dtype(values):
         _method = _algos.backfill_inplace_float64
     elif is_datetime64_dtype(values):
@@ -394,9 +397,13 @@ def backfill_1d(values, limit=None):
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
-    _method(values, isnull(values).view(np.uint8), limit=limit)
+    if mask is None:
+        mask = isnull(values)
+    mask = mask.view(np.uint8)
 
-def pad_2d(values, limit=None):
+    _method(values, mask, limit=limit)
+
+def pad_2d(values, limit=None, mask=None):
     if is_float_dtype(values):
         _method = _algos.pad_2d_inplace_float64
     elif is_datetime64_dtype(values):
@@ -406,9 +413,13 @@ def pad_2d(values, limit=None):
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
-    _method(values, isnull(values).view(np.uint8), limit=limit)
+    if mask is None:
+        mask = isnull(values)
+    mask = mask.view(np.uint8)
+
+    _method(values, mask, limit=limit)
 
-def backfill_2d(values, limit=None):
+def backfill_2d(values, limit=None, mask=None):
     if is_float_dtype(values):
         _method = _algos.backfill_2d_inplace_float64
     elif is_datetime64_dtype(values):
@@ -418,8 +429,11 @@ def backfill_2d(values, limit=None):
     else: # pragma: no cover
         raise ValueError('Invalid dtype for padding')
 
-    _method(values, isnull(values).view(np.uint8), limit=limit)
+    if mask is None:
+        mask = isnull(values)
+    mask = mask.view(np.uint8)
 
+    _method(values, mask, limit=limit)
 
 def _consensus_name_attr(objs):
     name = objs[0].name
diff --git a/pandas/core/series.py b/pandas/core/series.py
index aff454220f8b6..ce3da197bf4fd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2149,11 +2149,7 @@ def fillna(self, value=None, method='pad', inplace=False,
             if method is None:  # pragma: no cover
                 raise ValueError('must specify a fill method')
 
-            method = com._clean_fill_method(method)
-            if method == 'pad':
-                fill_f = com.pad_1d
-            elif method == 'backfill':
-                fill_f = com.backfill_1d
+            fill_f = _get_fill_func(method)
 
             if inplace:
                 values = self.values
@@ -2169,6 +2165,91 @@ def fillna(self, value=None, method='pad', inplace=False,
 
         return result
 
+
+    def replace(self, to_replace=None, value=None, method='pad', inplace=False,
+                limit=None):
+        """
+        Replace arbitrary values in a Series
+
+        Parameters
+        ----------
+        to_replace : list or dict, default None
+            list of values to be replaced or dict of replacement values
+        value : anything
+            if to_replace is a list then value is the replacement value
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+        inplace : boolean, default False
+            If True, fill the Series in place. Note: this will modify any other
+            views on this Series, for example a column in a DataFrame. Returns
+            a reference to the filled object, which is self if inplace=True
+        limit : int, default None
+            Maximum size gap to forward or backward fill
+
+        Notes
+        -----
+        replace does not distinguish between NaN and None
+
+        See also
+        --------
+        fillna, reindex, asfreq
+
+        Returns
+        -------
+        replaced : Series
+        """
+        result = self.copy() if not inplace else self
+        single_val = False
+
+        def _rep_one(s, to_rep, v): # replace single value
+            m = _mask_missing(s, to_rep)
+            np.putmask(s, m, v)
+            return s
+
+        def _rep_dict(rs, to_rep): # replace {[src] -> dest}
+
+            dd = {} # group by unique destination value
+            [dd.setdefault(d, []).append(s) for s, d in to_rep.iteritems()]
+
+            for d, sset in dd.iteritems(): # now replace by each dest
+                rs = _rep_one(rs, sset, d)
+            return rs
+
+        if isinstance(to_replace, dict):
+            return _rep_dict(result, to_replace)
+
+        if isinstance(to_replace, (list, np.ndarray)):
+
+            if isinstance(value, (list, np.ndarray)): # check same length
+
+                vl, rl = len(value), len(to_replace)
+                if vl == rl:
+                    return _rep_dict(result, dict(zip(to_replace, value)))
+                raise ValueError('Got %d to replace but %d values' % (rl, vl))
+
+            elif value is not None: # otherwise all replaced with same value
+
+                return _rep_one(result, to_replace, value)
+
+            else: # method
+                if method is None:  # pragma: no cover
+                    raise ValueError('must specify a fill method')
+                fill_f = _get_fill_func(method)
+
+                mask = _mask_missing(result, to_replace)
+                fill_f(result.values, limit=limit, mask=mask)
+
+                if not inplace:
+                    result = Series(result.values, index=self.index,
+                                    name=self.name)
+                return result
+
+
+        raise ValueError('Unrecognized to_replace type %s' %
+                         type(to_replace))
+
     def isin(self, values):
         """
         Return boolean vector showing whether each element in the Series is
@@ -2620,6 +2701,23 @@ def _resolve_offset(freq, kwds):
 
     return offset
 
+def _get_fill_func(method):
+    method = com._clean_fill_method(method)
+    if method == 'pad':
+        fill_f = com.pad_1d
+    elif method == 'backfill':
+        fill_f = com.backfill_1d
+    return fill_f
+
+def _mask_missing(series, missing_values):
+    missing_values = np.array(list(missing_values), dtype=object)
+    if isnull(missing_values).any():
+        missing_values = missing_values[notnull(missing_values)]
+        mask = isnull(series) | series.isin(missing_values)
+    else:
+        mask = series.isin(missing_values)
+    return mask
+
 
 #----------------------------------------------------------------------
 # Add plotting methods to Series
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index bd1b557c9c15a..c52eb06b698f8 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2703,23 +2703,43 @@ def test_timeseries_coercion(self):
 
     def test_replace(self):
         N = 100
-        ser = Series(np.fabs(np.random.randn(len(N))), tm.makeDataIndex(N))
+        ser = Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
+                     dtype=object)
         ser[:5] = np.nan
         ser[6:10] = 'foo'
         ser[20:30] = 'bar'
 
+        # replace list with a single value
         rs = ser.replace([np.nan, 'foo', 'bar'], -1)
+
         self.assert_((rs[:5] == -1).all())
         self.assert_((rs[6:10] == -1).all())
         self.assert_((rs[20:30] == -1).all())
-        self.assert_((ser >= 0).all())
+        self.assert_((isnull(ser[:5])).all())
 
+        # replace with different values
         rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
+
         self.assert_((rs[:5] == -1).all())
         self.assert_((rs[6:10] == -2).all())
         self.assert_((rs[20:30] == -3).all())
-        self.assert_((ser >= 0).all())
+        self.assert_((isnull(ser[:5])).all())
+
+        # replace with different values with 2 lists
+        rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
+        assert_series_equal(rs, rs2)
+
+        # replace with forward fill not considering np.nan missing
+        s2 = ser.copy()
+        s2[5] = np.nan
+        rs3 = s2.replace(['foo', 'bar'])
+        self.assert_(isnull(rs3[6]))
+
+        # replace with back fill considering np.nan as missing
+        rs4 = ser.replace([np.nan, 'foo', 'bar'], method='bfill')
+        assert_almost_equal(rs4[4], ser[5])
 
+        # replace inplace
         ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
         self.assert_((ser[:5] == -1).all())
         self.assert_((ser[6:10] == -1).all())

From b7546b219e73247008ee6ff4e77065708720d38c Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Wed, 9 May 2012 10:12:51 -0400
Subject: [PATCH 085/114] ENH: DataFrame.replace and cython replace. Only works
 for floats and ints. Need to generate datetime64 and object versions.

---
 pandas/core/frame.py           | 131 +++++++-
 pandas/core/internals.py       |  37 ++-
 pandas/core/series.py          |   8 +-
 pandas/src/codegen_replace.py  | 187 +++++++++++
 pandas/src/codegen_template.py | 408 +++++++++++++++++++++++
 pandas/src/generate_code.py    |  12 +-
 pandas/src/replace.pyx         | 575 +++++++++++++++++++++++++++++++++
 pandas/src/tseries.pyx         |  34 ++
 pandas/tests/test_frame.py     |  18 ++
 9 files changed, 1394 insertions(+), 16 deletions(-)
 create mode 100644 pandas/src/codegen_replace.py
 create mode 100644 pandas/src/codegen_template.py
 create mode 100644 pandas/src/replace.pyx

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ce870c7441caf..b699de61e5e3b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2699,6 +2699,136 @@ def fillna(self, value=None, method='pad', axis=0, inplace=False,
         else:
             return self._constructor(new_data)
 
+    def replace(self, to_replace, value=None, method='pad', axis=0,
+                inplace=False, limit=None):
+        """
+        Replace values given in 'to_replace' with 'value' or using 'method'
+
+        Parameters
+        ----------
+        value : scalar or dict, default None
+            Value to use to fill holes (e.g. 0), alternately a dict of values
+            specifying which value to use for each column (columns not in the
+            dict will not be filled)
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+        axis : {0, 1}, default 0
+            0: fill column-by-column
+            1: fill row-by-row
+        inplace : boolean, default False
+            If True, fill the DataFrame in place. Note: this will modify any
+            other views on this DataFrame, like if you took a no-copy slice of
+            an existing DataFrame, for example a column in a DataFrame. Returns
+            a reference to the filled object, which is self if inplace=True
+        limit : int, default None
+            Maximum size gap to forward or backward fill
+
+        See also
+        --------
+        reindex, asfreq
+
+        Returns
+        -------
+        filled : DataFrame
+        """
+        self._consolidate_inplace()
+
+        if value is None:
+            if self._is_mixed_type and axis == 1:
+                return self.T.replace(to_replace, method=method, limit=limit).T
+
+            method = com._clean_fill_method(method)
+
+            if isinstance(to_replace, dict):
+                rs = self if inplace else self.copy()
+                for k, v in to_replace.iteritems():
+                    if k in rs:
+                        rs[k].replace(v, method=method, limit=limit,
+                                      inplace=True)
+                return rs
+
+            else:
+                new_blocks = []
+                for block in self._data.blocks:
+                    newb = block.interpolate(method, axis=axis,
+                                             limit=limit, inplace=inplace,
+                                             missing=to_replace)
+                    new_blocks.append(newb)
+                new_data = BlockManager(new_blocks, self._data.axes)
+        else:
+            # Float type values
+            if len(self.columns) == 0:
+                return self
+
+            if np.isscalar(to_replace):
+                if np.isscalar(value): # np.nan -> 0
+                    new_data = self._data.replace(to_replace, value,
+                                                  inplace=inplace)
+                    if inplace:
+                        self._data = new_data
+                        return self
+                    else:
+                        return self._constructor(new_data)
+
+                elif isinstance(value, dict): # np.nan -> {'A' : 0, 'B' : -1}
+                    return self._replace_dest_dict(to_replace, value, inplace)
+
+            elif isinstance(to_replace, dict):
+                if np.isscalar(value): # {'A' : np.nan, 'B' : ''} -> 0
+                    return self._replace_src_dict(to_replace, value, inplace)
+                elif isinstance(value, dict): # {'A' : np.nan} -> {'A' : 0}
+                    return self._replace_both_dict(to_replace, value, inplace)
+                else:
+                    raise ValueError('Fill value must be scalar or dict')
+                return rs
+
+            elif isinstance(to_replace, (list, np.ndarray)):
+                # [np.nan, ''] -> [0, 'missing']
+                if isinstance(value, (list, np.ndarray)):
+                    if len(to_replace) != len(value):
+                        raise ValueError('Replacement lists must match '
+                                         'in length. Expecting %d got %d ' %
+                                         (len(to_replace), len(value)))
+
+                    new_data = self._data if inplace else self.copy()._data
+                    for s, d in zip(to_replace, value):
+                        new_data = new_data.replace(s, d, inplace=True)
+
+                else: # [np.nan, ''] -> 0
+                    new_data = self._data.replace(to_replace, value,
+                                                  inplace=inplace)
+                if inplace:
+                    self._data = new_data
+                    return self
+                else:
+                    return self._constructor(new_data)
+            else:
+                raise ValueError('Invalid to_replace type: %s' %
+                                 type(to_replace))
+
+    def _replace_dest_dict(self, to_replace, value, inplace):
+        rs = self if inplace else self.copy()
+        for k, v in value.iteritems():
+            if k in rs:
+                rs[k].replace(to_replace, v, inplace=True)
+        return rs
+
+    def _replace_src_dict(self, to_replace, value, inplace):
+        rs = self if inplace else self.copy()
+        for k, src in to_replace.iteritems():
+            if k in rs:
+                rs[k].replace(src, value, inplace=True)
+        return rs
+
+    def _replace_both_dict(self, to_replace, value, inplace):
+        rs = self if inplace else self.copy()
+        for c, src in to_replace.iteritems():
+            if c in value and c in rs:
+                rs[c].replace(src, value[c], inplace=True)
+        return rs
+
     #----------------------------------------------------------------------
     # Rename
 
@@ -4475,7 +4605,6 @@ def _is_sequence(x):
     except Exception:
         return False
 
-
 def install_ipython_completers():  # pragma: no cover
     """Register the DataFrame type with IPython's tab completion machinery, so
     that it knows about accessing column names as attributes."""
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 77969ffa26f17..1bd644e9d5a8e 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -208,16 +208,18 @@ def split_block_at(self, item):
         return left_block, right_block
 
     def fillna(self, value, inplace=False):
-        new_values = self.values if inplace else self.values.copy()
-        mask = com.isnull(new_values.ravel())
-        new_values.flat[mask] = value
+        return self.replace(np.nan, value, inplace)
 
+    def replace(self, to_replace, value, inplace=False):
+        new_values = self.values if inplace else self.values.copy()
+        lib.replace(new_values, to_replace, value)
         if inplace:
             return self
         else:
             return make_block(new_values, self.items, self.ref_items)
 
-    def interpolate(self, method='pad', axis=0, inplace=False, limit=None):
+    def interpolate(self, method='pad', axis=0, inplace=False,
+                    limit=None, missing=None):
         values = self.values if inplace else self.values.copy()
 
         if values.ndim != 2:
@@ -225,10 +227,15 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None):
 
         transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
 
+        if missing is None:
+            mask = None
+        else: # todo create faster fill func without masking
+            mask = _mask_missing(values, missing)
+
         if method == 'pad':
-            com.pad_2d(transf(values), limit=limit)
+            com.pad_2d(transf(values), limit=limit, mask=mask)
         else:
-            com.backfill_2d(transf(values), limit=limit)
+            com.backfill_2d(transf(values), limit=limit, mask=mask)
 
         return make_block(values, self.items, self.ref_items)
 
@@ -239,6 +246,18 @@ def take(self, indexer, axis=1, fill_value=np.nan):
                                    fill_value=fill_value)
         return make_block(new_values, self.items, self.ref_items)
 
+def _mask_missing(array, missing_values):
+    missing_values = np.array(list(missing_values), dtype=object)
+    if com.isnull(missing_values).any():
+        mask = com.isnull(array)
+        missing_values = missing_values[com.notnull(missing_values)]
+    for v in missing_values:
+        if mask is None:
+            mask = array == missing_values
+        else:
+            mask |= array == missing_values
+    return mask
+
 #-------------------------------------------------------------------------------
 # Is this even possible?
 
@@ -949,10 +968,10 @@ def add_suffix(self, suffix):
         return self.rename_items(f)
 
     def fillna(self, value, inplace=False):
-        """
+        return self.replace(np.nan, value, inplace)
 
-        """
-        new_blocks = [b.fillna(value, inplace=inplace)
+    def replace(self, to_replace, value, inplace=False):
+        new_blocks = [b.replace(to_replace, value, inplace=inplace)
                       if b._can_hold_na else b
                       for b in self.blocks]
         if inplace:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index ce3da197bf4fd..06c5a9ca1eeae 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2166,14 +2166,14 @@ def fillna(self, value=None, method='pad', inplace=False,
         return result
 
 
-    def replace(self, to_replace=None, value=None, method='pad', inplace=False,
+    def replace(self, to_replace, value=None, method='pad', inplace=False,
                 limit=None):
         """
         Replace arbitrary values in a Series
 
         Parameters
         ----------
-        to_replace : list or dict, default None
+        to_replace : list or dict
             list of values to be replaced or dict of replacement values
         value : anything
             if to_replace is a list then value is the replacement value
@@ -2204,8 +2204,7 @@ def replace(self, to_replace=None, value=None, method='pad', inplace=False,
         single_val = False
 
         def _rep_one(s, to_rep, v): # replace single value
-            m = _mask_missing(s, to_rep)
-            np.putmask(s, m, v)
+            lib.replace(s.values, to_rep, v)
             return s
 
         def _rep_dict(rs, to_rep): # replace {[src] -> dest}
@@ -2223,7 +2222,6 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
         if isinstance(to_replace, (list, np.ndarray)):
 
             if isinstance(value, (list, np.ndarray)): # check same length
-
                 vl, rl = len(value), len(to_replace)
                 if vl == rl:
                     return _rep_dict(result, dict(zip(to_replace, value)))
diff --git a/pandas/src/codegen_replace.py b/pandas/src/codegen_replace.py
new file mode 100644
index 0000000000000..12593d8d38bd3
--- /dev/null
+++ b/pandas/src/codegen_replace.py
@@ -0,0 +1,187 @@
+from copy import deepcopy
+import numpy as np
+
+#------------------------------------------------------------------------
+# Replace : slightly adapted from bottleneck
+
+loop_template = 'for iINDEX%d in range(nINDEX%d):'
+indent = '    '
+#replace_op = ('%sif mask[INDEXALL]:\n'
+#              '%s    a[INDEXALL] = new%s')
+
+nonna_op = ('%sai = a[INDEXALL]\n'
+            '%sif ai == old:\n'
+            '%s    a[INDEXALL] = new%s')
+na_op = ('%sai = a[INDEXALL]\n'
+         '%sif ai != ai:\n'
+         '%s    a[INDEXALL] = new%s')
+
+generic_top = """
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def NAME_NDIMd_DTYPE_axisAXIS(np.ndarray[np.DTYPE_t, ndim=NDIM] a,
+    double old, double new):
+    "replace (inplace) specified elements of NDIMd array of dtype=DTYPE."
+    cdef np.DTYPE_t ai
+"""
+
+int_check = """\
+        oldint = <np.DTYPE_t>old
+        newint = <np.DTYPE_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+"""
+
+def float_loop(ndims=3, type_suffix=''):
+    loop = {}
+    for n in range(1, ndims + 1):
+        loop_str = indent + 'if old==old: \n'
+        for i in range(n): # for i in range:
+            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
+
+        dent = indent * (n + 2)
+        loop_str += nonna_op % (dent, dent, dent, type_suffix)
+
+        loop_str += '\n' + indent + 'else:\n'
+        for i in range(n): # for i in range:
+            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
+
+        dent = indent * (n + 2)
+        loop_str += na_op % (dent, dent, dent, type_suffix)
+
+        loop[n] = loop_str + '\n'
+    return loop
+
+def int_loop(ndims=3, type_suffix='int'):
+    loop = {}
+    for n in range(1, ndims + 1):
+        loop_str = indent + 'if old==old: \n' + int_check
+        for i in range(n): # for i in range:
+            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
+
+        dent = indent * (n + 2)
+        loop_str += nonna_op % (dent, dent, dent, type_suffix)
+        loop[n] = loop_str + '\n'
+    return loop
+
+
+# float type functions
+floats = {}
+floats['dtypes'] = ['float32', 'float64']
+floats['axisNone'] = True
+floats['force_output_dtype'] = False
+floats['reuse_non_nan_func'] = False
+floats['top'] = generic_top
+floats['loop'] = float_loop()
+
+# int type functions
+ints = deepcopy(floats)
+ints['dtypes'] = ['int32', 'int64']
+ints['top'] = generic_top + """
+    cdef np.DTYPE_t oldint, newint
+    newint = <np.DTYPE_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+"""
+ints['loop'] = int_loop()
+
+# Slow, unaccelerated ndim/dtype --------------------------------------------
+def replace(arr, old, new):
+    "Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
+    if type(arr) is not np.ndarray:
+        raise TypeError("`arr` must be a numpy array.")
+    if not issubclass(arr.dtype.type, np.inexact):
+        if int(old) != old:
+            raise ValueError("Cannot safely cast `old` to int.")
+        if int(new) != new:
+            raise ValueError("Cannot safely cast `new` to int.")
+    if old != old:
+        mask = np.isnan(arr)
+    else:
+        mask = arr == old
+    np.putmask(arr, mask, new)
+
+slow = {}
+slow['name'] = "replace"
+slow['signature'] = "arr, old, new"
+slow['func'] = "slow_replace(arr, old, new)"
+
+replace = {}
+replace['name'] = 'replace'
+replace['is_reducing_function'] = False
+replace['cdef_output'] = False
+replace['slow'] = slow
+replace['templates'] = {}
+replace['templates']['float_None'] = floats
+replace['templates']['int_None'] = ints
+replace['pyx_file'] = 'replace.pyx'
+
+replace['main'] = '''"replace auto-generated from template"
+
+def replace(arr, old, new):
+    """
+    Replace (inplace) given scalar values of an array with new values.
+
+    similar to putmask but faster
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        The input array, which is also the output array since this functions
+        works inplace.
+    old : scalar
+    new : scalar
+        All masked elements in `arr` will be replaced by `new`.
+
+    Returns
+    -------
+    None, the operation is inplace.
+    """
+    func = replace_selector(arr)
+    if np.isscalar(old):
+        return func(arr, old, new)
+    else:
+        for o in old:
+            arr = func(arr, o, new)
+        return arr
+
+def replace_selector(arr):
+    """
+    Return replace function and array that matches `arr`.
+
+    Under the hood Bottleneck uses a separate replace() Cython function for
+    each combination of ndim and dtype. A lot of the overhead in bn.replace()
+    is inselecting the low level function to use.
+
+    You can get rid of the overhead by doing all this before you, for example,
+    enter an inner loop, by using this function.
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        Input array.
+
+    Returns
+    -------
+    func : function
+        The replace() function that matches the number of dimensions and dtype
+        of the input array.
+    """
+    axis = None
+    if type(arr) is not np.ndarray:
+        raise TypeError("`arr` must be a numpy array.")
+    cdef int ndim = PyArray_NDIM(arr)
+    cdef int dtype = PyArray_TYPE(arr)
+    cdef tuple key = (ndim, dtype, axis)
+    try:
+        func = replace_dict[key]
+    except KeyError:
+        try:
+            func = replace_slow_dict[axis]
+        except KeyError:
+            tup = (str(ndim), str(arr.dtype), str(axis))
+            raise TypeError("Unsupported ndim/dtype/axis (%s/%s/%s)." % tup)
+    return func
+'''
diff --git a/pandas/src/codegen_template.py b/pandas/src/codegen_template.py
new file mode 100644
index 0000000000000..a43d936e7cf74
--- /dev/null
+++ b/pandas/src/codegen_template.py
@@ -0,0 +1,408 @@
+"Copied from bottleneck: Turn templates into Cython pyx files."
+import os.path
+
+def template(func):
+    "'Convert template dictionary `func` to a pyx file.'\n"
+    codes = []
+    codes.append(func['main'])
+    select = Selector(func['name'])
+    for key in func['templates']:
+        f = func['templates'][key]
+        code = subtemplate(name=func['name'],
+                           top=f['top'],
+                           loop=f['loop'],
+                           axisNone=f['axisNone'],
+                           dtypes=f['dtypes'],
+                           force_output_dtype=f['force_output_dtype'],
+                           reuse_non_nan_func=f['reuse_non_nan_func'],
+                           is_reducing_function=func['is_reducing_function'],
+                           cdef_output=func['cdef_output'],
+                           select=select)
+        codes.append(code)
+    codes.append('\n' + str(select))
+    if 'slow' in func:
+        if func['slow'] is not None:
+            slow = func['slow']
+            code1 = slow_selector(slow['name'])
+            code2 = slow_functions(slow['name'],
+                                   slow['signature'],
+                                   slow['func'])
+            codes.append(code2)
+            codes.append(code1)
+    modpath = os.path.dirname(__file__)
+    fid = open(os.path.join(modpath, func['pyx_file']), 'w')
+    fid.write(''.join(codes))
+    fid.close()
+
+def subtemplate(name, top, loop, axisNone, dtypes, force_output_dtype,
+                reuse_non_nan_func, is_reducing_function, cdef_output, select):
+    "Assemble template"
+    ndims = sorted(loop.keys())
+    funcs = []
+    for ndim in ndims:
+        if axisNone:
+            axes = [None]
+        else:
+            axes = list(range(ndim))
+        for dtype in dtypes:
+            for axis in axes:
+
+                if reuse_non_nan_func:
+
+                    select.append(ndim, dtype, axis, True)
+
+                else:
+
+                    # Code template
+                    func = top
+
+                    # loop
+                    if force_output_dtype is not False:
+                        ydtype = force_output_dtype
+                    else:
+                        ydtype = dtype
+                    func += loop_cdef(ndim, ydtype, axis, is_reducing_function,
+                                      cdef_output)
+                    func += looper(loop[ndim], ndim, axis)
+
+                    # name, ndim, dtype, axis
+                    func = func.replace('NAME', name)
+                    func = func.replace('NDIM', str(ndim))
+                    func = func.replace('DTYPE', dtype)
+                    func = func.replace('AXIS', str(axis))
+
+                    funcs.append(func)
+                    select.append(ndim, dtype, axis)
+
+    return ''.join(funcs)
+
+def looper(loop, ndim, axis):
+    """
+    Given loop template, expand index markers for given `ndim` and `axis`.
+
+    Parameters
+    ----------
+    loop : str
+        Code of loop where the following template markers will be expanded
+        (example given is for 3d input, similarly for other nd):
+
+        ================= =================================================
+        INDEXALL          Replace with i0, i1, i2
+        INDEXPOP          If axis=1, e.g., replace with i0, i2
+        INDEXN            If N=1, e.g., replace with 1
+        INDEXREPLACE|exp| If exp = 'k - window' and axis=1, e.g., replace
+                          with i0, k - window, i2
+        NREPLACE|exp|     If exp = 'n - window' and axis=1, e.g., replace
+                          with n0, n - window, n2
+        ================= =================================================
+    ndim : int
+        Number of dimensions in the loop.
+    axis : {int, None}
+        Axis over which the loop is evaluated.
+
+    Returns
+    -------
+    code : str
+        Code for the loop with templated index markers expanded.
+
+    Examples
+    --------
+    Make a 3d loop template:
+
+    >>> loop = '''
+    .... for iINDEX0 in range(nINDEX0):
+    ....    for iINDEX1 in range(nINDEX1):
+    ....        amin = MAXDTYPE
+    ....        for iINDEX2 in range(nINDEX2):
+    ....            ai = a[INDEXALL]
+    ....            if ai <= amin:
+    ....                amin = ai
+    ....         y[INDEXPOP] = amin
+    .... '''
+
+    Import the looper function:
+
+    >>> from bottleneck.src.template.template import looper
+
+    Make a loop over axis=0:
+
+    >>> print(looper(loop, ndim=3, axis=0))
+    for i1 in range(n1):
+        for i2 in range(n2):
+            amin = MAXDTYPE
+            for i0 in range(n0):
+                ai = a[i0, i1, i2]
+                if ai <= amin:
+                    amin = ai
+            y[i1, i2] = amin
+
+    Make a loop over axis=1:
+
+    >>> print(looper(loop, ndim=3, axis=1))
+    for i0 in range(n0):
+        for i2 in range(n2):
+            amin = MAXDTYPE
+            for i1 in range(n1):
+                ai = a[i0, i1, i2]
+                if ai <= amin:
+                    amin = ai
+            y[i0, i2] = amin
+
+    Make a loop over axis=2:
+
+    >>> print(looper(loop, ndim=3, axis=2))
+    for i0 in range(n0):
+        for i1 in range(n1):
+            amin = MAXDTYPE
+            for i2 in range(n2):
+                ai = a[i0, i1, i2]
+                if ai <= amin:
+                    amin = ai
+            y[i0, i1] = amin
+
+    """
+
+    if ndim < 1:
+        raise ValueError("ndim(=%d) must be and integer greater than 0" % ndim)
+    if axis is not None:
+        if axis < 0:
+            raise ValueError("`axis` must be a non-negative integer or None")
+        elif axis >= ndim:
+            raise ValueError("`axis` must be less then `ndim`")
+
+    # INDEXALL
+    INDEXALL = ', '.join('i' + str(i) for i in range(ndim))
+    code = loop.replace('INDEXALL', INDEXALL)
+
+    # INDEXPOP
+    idx = list(range(ndim))
+    if axis is not None:
+        idx.pop(axis)
+    INDEXPOP = ', '.join(['i' + str(i) for i in idx])
+    code = code.replace('INDEXPOP', INDEXPOP)
+
+    # INDEXN
+    idx = list(range(ndim))
+    if axis is not None:
+        idxpop = idx.pop(axis)
+        idx.append(idxpop)
+    for i, j in enumerate(idx):
+        code = code.replace('INDEX%d' % i, '%d' % j)
+
+    # INDEXREPLACE|x|
+    mark = 'INDEXREPLACE|'
+    nreplace = code.count(mark)
+    if (nreplace > 0) and (axis is None):
+        raise ValueError("`INDEXREPLACE` cannot be used when axis is None.")
+    while mark in code:
+        idx0 = code.index(mark)
+        idx1 = idx0 + len(mark)
+        idx2 = idx1 + code[idx1:].index('|')
+        if (idx0 >= idx1) or (idx1 >= idx2):
+            raise RuntimeError("Parsing error or poorly formatted input.")
+        replacement = code[idx1:idx2]
+        idx = ['i' + str(i) for i in range(ndim)]
+        idx[axis] = replacement
+        idx = ', '.join(idx)
+        code = code[:idx0] + idx + code[idx2+1:]
+
+    # NREPLACE|x|
+    mark = 'NREPLACE|'
+    nreplace = code.count(mark)
+    # TODO: reuse while loop above, only difference is 'i' --> 'n'
+    while mark in code:
+        idx0 = code.index(mark)
+        idx1 = idx0 + len(mark)
+        idx2 = idx1 + code[idx1:].index('|')
+        if (idx0 >= idx1) or (idx1 >= idx2):
+            raise RuntimeError("Parsing error or poorly formatted input.")
+        replacement = code[idx1:idx2]
+        idx = ['n' + str(i) for i in range(ndim)]
+        idx[axis] = replacement
+        idx = ', '.join(idx)
+        code = code[:idx0] + idx + code[idx2+1:]
+
+    return code
+
+def loop_cdef(ndim, dtype, axis, is_reducing_function, cdef_output=True):
+    """
+    String of code that initializes variables needed in a for loop.
+
+    The output string contains code for: index array counters, one for each
+    dimension (cdef Py_size_t i0, i1, i2, ....); the length along each
+    dimension of the input array, `a` (cdef Py_ssize_t n0 = a.shape[0],...);
+    the initialized, empty output array, `y`.
+
+    Parameters
+    ----------
+    ndim = int
+        Number of dimensions.
+    dtype : str
+        The data type of the output. Used for initilizing the empty output
+        array, `y`.
+    is_reducing_function : bool
+        If True then remove the dimension given by `axis` when initializing
+        the output array, `y`.
+    cdef_output : bool, optional
+        If False then only initialize indices (i) and shapes (n). If True
+        (default) then also intialized output array `y`.
+
+    Returns
+    -------
+    cdefs : str
+        String of code to use to initialize variables needed for loop.
+
+    Examples
+    --------
+    Define parameters:
+
+    >>> ndim = 3
+    >>> dtype = 'float64'
+    >>> axis = 1
+    >>> is_reducing_function = True
+
+    Import loop_cdef:
+
+    >>> from bottleneck.src.template.template import loop_cdef
+
+    Make loop initialization code:
+
+    >>> print(loop_cdef(ndim, dtype, axis, is_reducing_function))
+        cdef Py_ssize_t i0, i1, i2
+        cdef np.npy_intp *dim
+        dim = PyArray_DIMS(a)
+        Py_ssize_t n0 = dim[0]
+        Py_ssize_t n1 = dim[1]
+        Py_ssize_t n2 = dim[2]
+        cdef np.npy_intp *dims = [n0, n2]
+        cdef np.ndarray[np.float64_t, ndim=2] y = PyArray_EMPTY(2, dims,
+                                                  NPY_float64, 0)
+
+    Repeat, but this time make the output non-reducing:
+
+    >>> is_reducing_function = False
+    >>> print(loop_cdef(ndim, dtype, axis, is_reducing_function))
+        cdef Py_ssize_t i0, i1, i2
+        cdef np.npy_intp *dim
+        dim = PyArray_DIMS(a)
+        Py_ssize_t n0 = dim[0]
+        Py_ssize_t n1 = dim[1]
+        Py_ssize_t n2 = dim[2]
+        cdef np.npy_intp *dims = [n0, n1, n2]
+        cdef np.ndarray[np.float64_t, ndim=3] y = PyArray_EMPTY(3, dims,
+                                                  NPY_float64, 0)
+
+    """
+
+    if ndim < 1:
+        raise ValueError("ndim(=%d) must be and integer greater than 0" % ndim)
+    if axis is not None:
+        if axis < 0:
+            raise ValueError("`axis` must be a non-negative integer or None")
+        elif axis >= ndim:
+            raise ValueError("`axis` must be less then `ndim`")
+
+    tab = '    '
+    cdefs = []
+
+    # cdef loop indices
+    idx = ', '.join('i'+str(i) for i in range(ndim))
+    cdefs.append(tab + 'cdef Py_ssize_t ' + idx)
+
+    # Length along each dimension
+    cdefs.append(tab + "cdef np.npy_intp *dim")
+    cdefs.append(tab + "dim = PyArray_DIMS(a)")
+    for dim in range(ndim):
+        cdefs.append(tab + "cdef Py_ssize_t n%d = dim[%d]" % (dim, dim))
+
+    if not cdef_output:
+        return '\n'.join(cdefs) + '\n'
+
+    # cdef initialize output
+    if is_reducing_function:
+        if (ndim > 1) and (axis is not None):
+            idx = list(range(ndim))
+            del idx[axis]
+            ns = ', '.join(['n'+str(i) for i in idx])
+            cdefs.append("%scdef np.npy_intp *dims = [%s]" % (tab, ns))
+            y = "%scdef np.ndarray[np.%s_t, ndim=%d] "
+            y += "y = PyArray_EMPTY(%d, dims,"
+            y += "\n                                              NPY_%s, 0)"
+            cdefs.append(y % (tab, dtype, ndim-1, ndim-1, dtype))
+    else:
+        idx = list(range(ndim))
+        ns = ', '.join('n'+str(i) for i in idx)
+        cdefs.append("%scdef np.npy_intp *dims = [%s]" % (tab, ns))
+        y = "%scdef np.ndarray[np.%s_t, ndim=%d] "
+        y += "y = PyArray_EMPTY(%d, dims,"
+        y += "\n                                              NPY_%s, 0)"
+        cdefs.append(y % (tab, dtype, ndim, ndim, dtype))
+
+    return '\n'.join(cdefs) + '\n'
+
+class Selector(object):
+    "String of code for dictionary that maps dtype to cython function."
+
+    def __init__(self, name):
+        self.name = name
+        self.data = []
+
+    def append(self, ndim, dtype, axis, reuse=False):
+        self.data.append((ndim, dtype, axis, reuse))
+
+    def __str__(self):
+        fmt = "%s_dict[(%s, NPY_%s, %s)] = %s_%sd_%s_axis%s"
+        src = []
+        src.append("cdef dict %s_dict = {}" % self.name)
+        for ndim, dtype, axis, reuse in self.data:
+            name = self.name
+            if reuse:
+                name = name.replace('nan', '')
+            if (ndim == 1) and (axis is None):
+                tup = (self.name, str(ndim), str(dtype), str(0),
+                       name, str(ndim), str(dtype), str(axis))
+                src.append(fmt % tup)
+            tup = (self.name, str(ndim), str(dtype), str(axis),
+                   name, str(ndim), str(dtype), str(axis))
+            src.append(fmt % tup)
+        return '\n'.join(src)
+
+def slow_selector(name, maxaxis=32):
+    "String of code for slow function mapping dictionary."
+    axes = list(range(maxaxis+1)) + [None]
+    src = ['\n']
+    src.append("cdef dict %s_slow_dict = {}" % name)
+    fmt = "%s_slow_dict[%s] = %s_slow_axis%s"
+    for axis in axes:
+        tup = 2 * (name, str(axis))
+        src.append(fmt % tup)
+    return '\n'.join(src)
+
+def slow_functions(name, signature, func, maxaxis=32):
+    "String of code for slow functions."
+    axes = list(range(maxaxis+1)) + [None]
+    tab = '    '
+    sig = "def %s_slow_axis%s(%s):"
+    doc = '%s"Unaccelerated (slow) %s along axis %s."'
+    function = "%sreturn %s\n"
+    src = ['\n']
+    for axis in axes:
+
+        axis = str(axis)
+
+        # signature
+        code = sig % (name, axis, signature)
+        code = code.replace('AXIS', axis)
+        src.append(code)
+
+        # docstring
+        code = doc % (tab, name, axis)
+        code = code.replace('AXIS', axis)
+        src.append(code)
+
+        # function
+        code = function % (tab, func)
+        code = code.replace('AXIS', axis)
+        src.append(code)
+
+    return '\n'.join(src)
diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index 643d70831074f..eb458dd8508e3 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -1,4 +1,7 @@
+import os
 from pandas.util.py3compat import StringIO
+from pandas.src.codegen_template import template as pyx_template
+from pandas.src.codegen_replace import replace
 
 header = """
 cimport numpy as np
@@ -867,6 +870,10 @@ def put2d_%(name)s_%(dest_type)s(ndarray[%(c_type)s, ndim=2, cast=True] values,
         out[i] = values[j, loc]
 """
 
+
+#-------------------------------------------------------------------------
+# Generators
+
 def generate_put_functions():
     function_list = [
         ('float64', 'float64_t', 'object'),
@@ -936,7 +943,9 @@ def generate_from_template(template, ndim=1, exclude=None):
 # templates_1d_datetime = [take_1d_template]
 # templates_2d_datetime = [take_2d_axis0_template,
 #                          take_2d_axis1_template]
-
+def codegen_pyx(funcs):
+    for func in funcs:
+        pyx_template(funcs[func])
 
 def generate_take_cython_file(path='generated.pyx'):
     with open(path, 'w') as f:
@@ -960,6 +969,7 @@ def generate_take_cython_file(path='generated.pyx'):
         print >> f, generate_ensure_dtypes()
 
         # print >> f, generate_put_functions()
+        codegen_pyx({'replace': replace})
 
 if __name__ == '__main__':
     generate_take_cython_file()
diff --git a/pandas/src/replace.pyx b/pandas/src/replace.pyx
new file mode 100644
index 0000000000000..c785518e9ab83
--- /dev/null
+++ b/pandas/src/replace.pyx
@@ -0,0 +1,575 @@
+"replace auto-generated from template"
+
+def replace(arr, old, new):
+    """
+    Replace (inplace) given scalar values of an array with new values.
+
+    similar to putmask but faster
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        The input array, which is also the output array since this functions
+        works inplace.
+    old : scalar
+    new : scalar
+        All masked elements in `arr` will be replaced by `new`.
+
+    Returns
+    -------
+    None, the operation is inplace.
+    """
+    func = replace_selector(arr)
+    if np.isscalar(old):
+        return func(arr, old, new)
+    else:
+        for o in old:
+            arr = func(arr, o, new)
+        return arr
+
+def replace_selector(arr):
+    """
+    Return replace function and array that matches `arr`.
+
+    Under the hood Bottleneck uses a separate replace() Cython function for
+    each combination of ndim and dtype. A lot of the overhead in bn.replace()
+    is inselecting the low level function to use.
+
+    You can get rid of the overhead by doing all this before you, for example,
+    enter an inner loop, by using this function.
+
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        Input array.
+
+    Returns
+    -------
+    func : function
+        The replace() function that matches the number of dimensions and dtype
+        of the input array.
+    """
+    axis = None
+    if type(arr) is not np.ndarray:
+        raise TypeError("`arr` must be a numpy array.")
+    cdef int ndim = PyArray_NDIM(arr)
+    cdef int dtype = PyArray_TYPE(arr)
+    cdef tuple key = (ndim, dtype, axis)
+    try:
+        func = replace_dict[key]
+    except KeyError:
+        try:
+            func = replace_slow_dict[axis]
+        except KeyError:
+            tup = (str(ndim), str(arr.dtype), str(axis))
+            raise TypeError("Unsupported ndim/dtype/axis (%s/%s/%s)." % tup)
+    return func
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_1d_int32_axisNone(np.ndarray[np.int32_t, ndim=1] a,
+    double old, double new):
+    "replace (inplace) specified elements of 1d array of dtype=int32."
+    cdef np.int32_t ai
+
+    cdef np.int32_t oldint, newint
+    newint = <np.int32_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    if old==old: 
+        oldint = <np.int32_t>old
+        newint = <np.int32_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai == old:
+                a[i0] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_1d_int64_axisNone(np.ndarray[np.int64_t, ndim=1] a,
+    double old, double new):
+    "replace (inplace) specified elements of 1d array of dtype=int64."
+    cdef np.int64_t ai
+
+    cdef np.int64_t oldint, newint
+    newint = <np.int64_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    if old==old: 
+        oldint = <np.int64_t>old
+        newint = <np.int64_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai == old:
+                a[i0] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_2d_int32_axisNone(np.ndarray[np.int32_t, ndim=2] a,
+    double old, double new):
+    "replace (inplace) specified elements of 2d array of dtype=int32."
+    cdef np.int32_t ai
+
+    cdef np.int32_t oldint, newint
+    newint = <np.int32_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0, i1
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    if old==old: 
+        oldint = <np.int32_t>old
+        newint = <np.int32_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai == old:
+                    a[i0, i1] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_2d_int64_axisNone(np.ndarray[np.int64_t, ndim=2] a,
+    double old, double new):
+    "replace (inplace) specified elements of 2d array of dtype=int64."
+    cdef np.int64_t ai
+
+    cdef np.int64_t oldint, newint
+    newint = <np.int64_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0, i1
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    if old==old: 
+        oldint = <np.int64_t>old
+        newint = <np.int64_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai == old:
+                    a[i0, i1] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_3d_int32_axisNone(np.ndarray[np.int32_t, ndim=3] a,
+    double old, double new):
+    "replace (inplace) specified elements of 3d array of dtype=int32."
+    cdef np.int32_t ai
+
+    cdef np.int32_t oldint, newint
+    newint = <np.int32_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0, i1, i2
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    cdef Py_ssize_t n2 = dim[2]
+    if old==old: 
+        oldint = <np.int32_t>old
+        newint = <np.int32_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai == old:
+                        a[i0, i1, i2] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_3d_int64_axisNone(np.ndarray[np.int64_t, ndim=3] a,
+    double old, double new):
+    "replace (inplace) specified elements of 3d array of dtype=int64."
+    cdef np.int64_t ai
+
+    cdef np.int64_t oldint, newint
+    newint = <np.int64_t>new
+    if newint != new:
+        raise ValueError('Cannot safely cast `new` to int.')
+    cdef Py_ssize_t i0, i1, i2
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    cdef Py_ssize_t n2 = dim[2]
+    if old==old: 
+        oldint = <np.int64_t>old
+        newint = <np.int64_t>new
+        if oldint != old:
+            raise ValueError('Cannot safely cast `old` to int.')
+        if newint != new:
+            raise ValueError('Cannot safely cast `new` to int.')
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai == old:
+                        a[i0, i1, i2] = newint
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_1d_float32_axisNone(np.ndarray[np.float32_t, ndim=1] a,
+    double old, double new):
+    "replace (inplace) specified elements of 1d array of dtype=float32."
+    cdef np.float32_t ai
+    cdef Py_ssize_t i0
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    if old==old: 
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai == old:
+                a[i0] = new
+    else:
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai != ai:
+                a[i0] = new
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_1d_float64_axisNone(np.ndarray[np.float64_t, ndim=1] a,
+    double old, double new):
+    "replace (inplace) specified elements of 1d array of dtype=float64."
+    cdef np.float64_t ai
+    cdef Py_ssize_t i0
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    if old==old: 
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai == old:
+                a[i0] = new
+    else:
+        for i0 in range(n0):
+            ai = a[i0]
+            if ai != ai:
+                a[i0] = new
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_2d_float32_axisNone(np.ndarray[np.float32_t, ndim=2] a,
+    double old, double new):
+    "replace (inplace) specified elements of 2d array of dtype=float32."
+    cdef np.float32_t ai
+    cdef Py_ssize_t i0, i1
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    if old==old: 
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai == old:
+                    a[i0, i1] = new
+    else:
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai != ai:
+                    a[i0, i1] = new
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_2d_float64_axisNone(np.ndarray[np.float64_t, ndim=2] a,
+    double old, double new):
+    "replace (inplace) specified elements of 2d array of dtype=float64."
+    cdef np.float64_t ai
+    cdef Py_ssize_t i0, i1
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    if old==old: 
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai == old:
+                    a[i0, i1] = new
+    else:
+        for i0 in range(n0):
+            for i1 in range(n1):
+                ai = a[i0, i1]
+                if ai != ai:
+                    a[i0, i1] = new
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_3d_float32_axisNone(np.ndarray[np.float32_t, ndim=3] a,
+    double old, double new):
+    "replace (inplace) specified elements of 3d array of dtype=float32."
+    cdef np.float32_t ai
+    cdef Py_ssize_t i0, i1, i2
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    cdef Py_ssize_t n2 = dim[2]
+    if old==old: 
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai == old:
+                        a[i0, i1, i2] = new
+    else:
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai != ai:
+                        a[i0, i1, i2] = new
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def replace_3d_float64_axisNone(np.ndarray[np.float64_t, ndim=3] a,
+    double old, double new):
+    "replace (inplace) specified elements of 3d array of dtype=float64."
+    cdef np.float64_t ai
+    cdef Py_ssize_t i0, i1, i2
+    cdef np.npy_intp *dim
+    dim = PyArray_DIMS(a)
+    cdef Py_ssize_t n0 = dim[0]
+    cdef Py_ssize_t n1 = dim[1]
+    cdef Py_ssize_t n2 = dim[2]
+    if old==old: 
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai == old:
+                        a[i0, i1, i2] = new
+    else:
+        for i0 in range(n0):
+            for i1 in range(n1):
+                for i2 in range(n2):
+                    ai = a[i0, i1, i2]
+                    if ai != ai:
+                        a[i0, i1, i2] = new
+
+cdef dict replace_dict = {}
+replace_dict[(1, NPY_int32, 0)] = replace_1d_int32_axisNone
+replace_dict[(1, NPY_int32, None)] = replace_1d_int32_axisNone
+replace_dict[(1, NPY_int64, 0)] = replace_1d_int64_axisNone
+replace_dict[(1, NPY_int64, None)] = replace_1d_int64_axisNone
+replace_dict[(2, NPY_int32, None)] = replace_2d_int32_axisNone
+replace_dict[(2, NPY_int64, None)] = replace_2d_int64_axisNone
+replace_dict[(3, NPY_int32, None)] = replace_3d_int32_axisNone
+replace_dict[(3, NPY_int64, None)] = replace_3d_int64_axisNone
+replace_dict[(1, NPY_float32, 0)] = replace_1d_float32_axisNone
+replace_dict[(1, NPY_float32, None)] = replace_1d_float32_axisNone
+replace_dict[(1, NPY_float64, 0)] = replace_1d_float64_axisNone
+replace_dict[(1, NPY_float64, None)] = replace_1d_float64_axisNone
+replace_dict[(2, NPY_float32, None)] = replace_2d_float32_axisNone
+replace_dict[(2, NPY_float64, None)] = replace_2d_float64_axisNone
+replace_dict[(3, NPY_float32, None)] = replace_3d_float32_axisNone
+replace_dict[(3, NPY_float64, None)] = replace_3d_float64_axisNone
+
+def replace_slow_axis0(arr, old, new):
+    "Unaccelerated (slow) replace along axis 0."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis1(arr, old, new):
+    "Unaccelerated (slow) replace along axis 1."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis2(arr, old, new):
+    "Unaccelerated (slow) replace along axis 2."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis3(arr, old, new):
+    "Unaccelerated (slow) replace along axis 3."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis4(arr, old, new):
+    "Unaccelerated (slow) replace along axis 4."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis5(arr, old, new):
+    "Unaccelerated (slow) replace along axis 5."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis6(arr, old, new):
+    "Unaccelerated (slow) replace along axis 6."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis7(arr, old, new):
+    "Unaccelerated (slow) replace along axis 7."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis8(arr, old, new):
+    "Unaccelerated (slow) replace along axis 8."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis9(arr, old, new):
+    "Unaccelerated (slow) replace along axis 9."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis10(arr, old, new):
+    "Unaccelerated (slow) replace along axis 10."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis11(arr, old, new):
+    "Unaccelerated (slow) replace along axis 11."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis12(arr, old, new):
+    "Unaccelerated (slow) replace along axis 12."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis13(arr, old, new):
+    "Unaccelerated (slow) replace along axis 13."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis14(arr, old, new):
+    "Unaccelerated (slow) replace along axis 14."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis15(arr, old, new):
+    "Unaccelerated (slow) replace along axis 15."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis16(arr, old, new):
+    "Unaccelerated (slow) replace along axis 16."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis17(arr, old, new):
+    "Unaccelerated (slow) replace along axis 17."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis18(arr, old, new):
+    "Unaccelerated (slow) replace along axis 18."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis19(arr, old, new):
+    "Unaccelerated (slow) replace along axis 19."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis20(arr, old, new):
+    "Unaccelerated (slow) replace along axis 20."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis21(arr, old, new):
+    "Unaccelerated (slow) replace along axis 21."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis22(arr, old, new):
+    "Unaccelerated (slow) replace along axis 22."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis23(arr, old, new):
+    "Unaccelerated (slow) replace along axis 23."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis24(arr, old, new):
+    "Unaccelerated (slow) replace along axis 24."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis25(arr, old, new):
+    "Unaccelerated (slow) replace along axis 25."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis26(arr, old, new):
+    "Unaccelerated (slow) replace along axis 26."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis27(arr, old, new):
+    "Unaccelerated (slow) replace along axis 27."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis28(arr, old, new):
+    "Unaccelerated (slow) replace along axis 28."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis29(arr, old, new):
+    "Unaccelerated (slow) replace along axis 29."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis30(arr, old, new):
+    "Unaccelerated (slow) replace along axis 30."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis31(arr, old, new):
+    "Unaccelerated (slow) replace along axis 31."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axis32(arr, old, new):
+    "Unaccelerated (slow) replace along axis 32."
+    return slow_replace(arr, old, new)
+
+def replace_slow_axisNone(arr, old, new):
+    "Unaccelerated (slow) replace along axis None."
+    return slow_replace(arr, old, new)
+
+
+cdef dict replace_slow_dict = {}
+replace_slow_dict[0] = replace_slow_axis0
+replace_slow_dict[1] = replace_slow_axis1
+replace_slow_dict[2] = replace_slow_axis2
+replace_slow_dict[3] = replace_slow_axis3
+replace_slow_dict[4] = replace_slow_axis4
+replace_slow_dict[5] = replace_slow_axis5
+replace_slow_dict[6] = replace_slow_axis6
+replace_slow_dict[7] = replace_slow_axis7
+replace_slow_dict[8] = replace_slow_axis8
+replace_slow_dict[9] = replace_slow_axis9
+replace_slow_dict[10] = replace_slow_axis10
+replace_slow_dict[11] = replace_slow_axis11
+replace_slow_dict[12] = replace_slow_axis12
+replace_slow_dict[13] = replace_slow_axis13
+replace_slow_dict[14] = replace_slow_axis14
+replace_slow_dict[15] = replace_slow_axis15
+replace_slow_dict[16] = replace_slow_axis16
+replace_slow_dict[17] = replace_slow_axis17
+replace_slow_dict[18] = replace_slow_axis18
+replace_slow_dict[19] = replace_slow_axis19
+replace_slow_dict[20] = replace_slow_axis20
+replace_slow_dict[21] = replace_slow_axis21
+replace_slow_dict[22] = replace_slow_axis22
+replace_slow_dict[23] = replace_slow_axis23
+replace_slow_dict[24] = replace_slow_axis24
+replace_slow_dict[25] = replace_slow_axis25
+replace_slow_dict[26] = replace_slow_axis26
+replace_slow_dict[27] = replace_slow_axis27
+replace_slow_dict[28] = replace_slow_axis28
+replace_slow_dict[29] = replace_slow_axis29
+replace_slow_dict[30] = replace_slow_axis30
+replace_slow_dict[31] = replace_slow_axis31
+replace_slow_dict[32] = replace_slow_axis32
+replace_slow_dict[None] = replace_slow_axisNone
\ No newline at end of file
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 55c0b3c5a92c7..50eb07d62b31d 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -2,6 +2,10 @@ cimport numpy as np
 cimport cython
 
 from numpy cimport *
+from numpy cimport NPY_INT32 as NPY_int32
+from numpy cimport NPY_INT64 as NPY_int64
+from numpy cimport NPY_FLOAT32 as NPY_float32
+from numpy cimport NPY_FLOAT64 as NPY_float64
 
 from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       PyDict_Contains, PyDict_Keys,
@@ -665,6 +669,36 @@ def value_count_int64(ndarray[int64_t] values):
 
     return result_keys, result_counts
 
+def array_isnull(arr):
+    if np.isscalar(arr) or arr is None:
+        return checknull(arr)
+    if arr.dtype.kind in ('O', 'S'):
+        # Working around NumPy ticket 1542
+        shape = arr.shape
+        result = np.empty(shape, dtype=bool)
+        vec = isnullobj(arr.ravel())
+        result[:] = vec.reshape(shape)
+    elif arr.dtype == np.datetime64:
+        # this is the NaT pattern
+        result = np.array(arr).view('i8') == NaT
+    else:
+        result = -np.isfinite(arr)
+    return result
+
+def slow_replace(arr, old, new):
+    "Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
+    if type(arr) is not np.ndarray:
+        raise TypeError("`arr` must be a numpy array.")
+    if not issubclass(arr.dtype.type, np.inexact):
+        if int(old) != old:
+            raise ValueError("Cannot safely cast `old` to int.")
+        if int(new) != new:
+            raise ValueError("Cannot safely cast `new` to int.")
+    if array_isnull(old):
+        mask = array_isnull(arr)
+    else:
+        mask = arr == old
+    np.putmask(arr, mask, new)
 
 include "hashtable.pyx"
 include "datetime.pyx"
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 9bfe029b1bce1..25f2e1a7774bb 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3509,6 +3509,24 @@ def test_fillna_columns(self):
         expected = df.astype(float).fillna(axis=1)
         assert_frame_equal(result, expected)
 
+    def test_replace(self):
+        pass
+
+    def test_replace_inplace(self):
+        pass
+
+    def test_replace_method(self):
+        pass
+
+    def test_replace_col_dict(self):
+        pass
+
+    def test_replace_axis(self):
+        pass
+
+    def test_replace_limit(self):
+        pass
+
     def test_truncate(self):
         offset = datetools.bday
 

From 45773c93c8da501a14faca0dd83a882974b49423 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Thu, 10 May 2012 17:31:55 -0400
Subject: [PATCH 086/114] ENH: finishing up DataFrame.replace need to revisit

---
 pandas/core/frame.py          |  11 +++
 pandas/core/internals.py      |  82 ++++++++++++++++++--
 pandas/core/series.py         |  10 ++-
 pandas/src/codegen_replace.py |   2 +-
 pandas/src/replace.pyx        |   2 +-
 pandas/src/tseries.pyx        |  74 +++++++++++++++---
 pandas/tests/test_frame.py    | 136 +++++++++++++++++++++++++---------
 pandas/tests/test_panel.py    |  37 ---------
 pandas/tests/test_series.py   |  67 +++++++++++++++++
 vb_suite/replace.py           |  24 ++++++
 10 files changed, 353 insertions(+), 92 deletions(-)
 create mode 100644 vb_suite/replace.py

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b699de61e5e3b..b8e9c3a12a382 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2742,6 +2742,10 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
             method = com._clean_fill_method(method)
 
             if isinstance(to_replace, dict):
+                if axis == 1:
+                    return self.T.replace(to_replace, method=method,
+                                          limit=limit).T
+
                 rs = self if inplace else self.copy()
                 for k, v in to_replace.iteritems():
                     if k in rs:
@@ -2757,6 +2761,13 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
                                              missing=to_replace)
                     new_blocks.append(newb)
                 new_data = BlockManager(new_blocks, self._data.axes)
+
+                if inplace:
+                    self._data = new_data
+                    return self
+                else:
+                    return self._constructor(new_data)
+
         else:
             # Float type values
             if len(self.columns) == 0:
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 1bd644e9d5a8e..574ed8dfc4fdc 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -208,11 +208,41 @@ def split_block_at(self, item):
         return left_block, right_block
 
     def fillna(self, value, inplace=False):
-        return self.replace(np.nan, value, inplace)
+        new_values = self.values if inplace else self.values.copy()
+
+        mask = com.isnull(new_values)
+        np.putmask(new_values, mask, value)
+
+        if inplace:
+            return self
+        else:
+            return make_block(new_values, self.items, self.ref_items)
+
+    def _can_hold_element(self, value):
+        raise NotImplementedError()
+
+    def _try_cast(self, value):
+        raise NotImplementedError()
 
     def replace(self, to_replace, value, inplace=False):
         new_values = self.values if inplace else self.values.copy()
-        lib.replace(new_values, to_replace, value)
+        if self._can_hold_element(value):
+            value = self._try_cast(value)
+
+        if np.isscalar(to_replace):
+            if self._can_hold_element(to_replace):
+                to_replace = self._try_cast(to_replace)
+                lib.replace(new_values, to_replace, value)
+        else:
+            try:
+                to_replace = np.array(to_replace, dtype=self.dtype)
+                lib.replace(new_values, to_replace, value)
+            except:
+                to_replace = np.array(to_replace, dtype=object)
+                for r in to_replace:
+                    if self._can_hold_element(r):
+                        r = self._try_cast(r)
+                        lib.replace(new_values, r, value)
         if inplace:
             return self
         else:
@@ -230,7 +260,7 @@ def interpolate(self, method='pad', axis=0, inplace=False,
         if missing is None:
             mask = None
         else: # todo create faster fill func without masking
-            mask = _mask_missing(values, missing)
+            mask = _mask_missing(transf(values), missing)
 
         if method == 'pad':
             com.pad_2d(transf(values), limit=limit, mask=mask)
@@ -247,10 +277,14 @@ def take(self, indexer, axis=1, fill_value=np.nan):
         return make_block(new_values, self.items, self.ref_items)
 
 def _mask_missing(array, missing_values):
-    missing_values = np.array(list(missing_values), dtype=object)
+    if np.isscalar(missing_values):
+        missing_values = [missing_values]
+
+    missing_values = np.array(missing_values, dtype=object)
     if com.isnull(missing_values).any():
         mask = com.isnull(array)
         missing_values = missing_values[com.notnull(missing_values)]
+
     for v in missing_values:
         if mask is None:
             mask = array == missing_values
@@ -264,6 +298,15 @@ def _mask_missing(array, missing_values):
 class FloatBlock(Block):
     _can_hold_na = True
 
+    def _can_hold_element(self, element):
+        return isinstance(element, (float, int))
+
+    def _try_cast(self, element):
+        try:
+            return float(element)
+        except:
+            return element
+
     def should_store(self, value):
         # when inserting a column should not coerce integers to floats
         # unnecessarily
@@ -278,18 +321,42 @@ def should_store(self, value):
 class IntBlock(Block):
     _can_hold_na = False
 
+    def _can_hold_element(self, element):
+        return isinstance(element, int)
+
+    def _try_cast(self, element):
+        try:
+            return int(element)
+        except:
+            return element
+
     def should_store(self, value):
         return issubclass(value.dtype.type, np.integer)
 
 class BoolBlock(Block):
     _can_hold_na = False
 
+    def _can_hold_element(self, element):
+        return isinstance(element, (int, bool))
+
+    def _try_cast(self, element):
+        try:
+            return bool(element)
+        except:
+            return element
+
     def should_store(self, value):
         return issubclass(value.dtype.type, np.bool_)
 
 class ObjectBlock(Block):
     _can_hold_na = True
 
+    def _can_hold_element(self, element):
+        return True
+
+    def _try_cast(self, element):
+        return element
+
     def should_store(self, value):
         return not issubclass(value.dtype.type,
                               (np.integer, np.floating, np.complexfloating,
@@ -968,7 +1035,12 @@ def add_suffix(self, suffix):
         return self.rename_items(f)
 
     def fillna(self, value, inplace=False):
-        return self.replace(np.nan, value, inplace)
+        new_blocks = [b.fillna(value, inplace=inplace)
+                      if b._can_hold_na else b
+                      for b in self.blocks]
+        if inplace:
+            return self
+        return BlockManager(new_blocks, self.axes)
 
     def replace(self, to_replace, value, inplace=False):
         new_blocks = [b.replace(to_replace, value, inplace=inplace)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 06c5a9ca1eeae..7a539a1b91d17 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2140,10 +2140,9 @@ def fillna(self, value=None, method='pad', inplace=False,
         -------
         filled : Series
         """
-        mask = isnull(self.values)
-
         if value is not None:
             result = self.copy() if not inplace else self
+            mask = isnull(self.values)
             np.putmask(result, mask, value)
         else:
             if method is None:  # pragma: no cover
@@ -2201,9 +2200,11 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
         replaced : Series
         """
         result = self.copy() if not inplace else self
-        single_val = False
 
         def _rep_one(s, to_rep, v): # replace single value
+            if isinstance(to_rep, (list, np.ndarray)):
+                to_rep = lib.maybe_convert_objects(np.array(to_rep,
+                                                            dtype=object))
             lib.replace(s.values, to_rep, v)
             return s
 
@@ -2216,6 +2217,9 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
                 rs = _rep_one(rs, sset, d)
             return rs
 
+        if np.isscalar(to_replace):
+            to_replace = [to_replace]
+
         if isinstance(to_replace, dict):
             return _rep_dict(result, to_replace)
 
diff --git a/pandas/src/codegen_replace.py b/pandas/src/codegen_replace.py
index 12593d8d38bd3..46ac0242d96ba 100644
--- a/pandas/src/codegen_replace.py
+++ b/pandas/src/codegen_replace.py
@@ -144,7 +144,7 @@ def replace(arr, old, new):
         return func(arr, old, new)
     else:
         for o in old:
-            arr = func(arr, o, new)
+            func(arr, o, new)
         return arr
 
 def replace_selector(arr):
diff --git a/pandas/src/replace.pyx b/pandas/src/replace.pyx
index c785518e9ab83..4547d368059de 100644
--- a/pandas/src/replace.pyx
+++ b/pandas/src/replace.pyx
@@ -24,7 +24,7 @@ def replace(arr, old, new):
         return func(arr, old, new)
     else:
         for o in old:
-            arr = func(arr, o, new)
+            func(arr, o, new)
         return arr
 
 def replace_selector(arr):
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 50eb07d62b31d..03644d809b9e2 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -1,5 +1,6 @@
 cimport numpy as np
 cimport cython
+import numpy as np
 
 from numpy cimport *
 from numpy cimport NPY_INT32 as NPY_int32
@@ -7,6 +8,26 @@ from numpy cimport NPY_INT64 as NPY_int64
 from numpy cimport NPY_FLOAT32 as NPY_float32
 from numpy cimport NPY_FLOAT64 as NPY_float64
 
+int32 = np.dtype(np.int32)
+int64 = np.dtype(np.int64)
+float32 = np.dtype(np.float32)
+float64 = np.dtype(np.float64)
+
+cdef np.int32_t MINint32 = np.iinfo(np.int32).min
+cdef np.int64_t MINint64 = np.iinfo(np.int64).min
+cdef np.float32_t MINfloat32 = np.NINF
+cdef np.float64_t MINfloat64 = np.NINF
+
+cdef np.int32_t MAXint32 = np.iinfo(np.int32).max
+cdef np.int64_t MAXint64 = np.iinfo(np.int64).max
+cdef np.float32_t MAXfloat32 = np.inf
+cdef np.float64_t MAXfloat64 = np.inf
+
+
+cdef extern from "numpy/arrayobject.h":
+    cdef enum NPY_TYPES:
+        NPY_intp "NPY_INTP"
+
 from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       PyDict_Contains, PyDict_Keys,
                       Py_INCREF, PyTuple_SET_ITEM,
@@ -15,10 +36,10 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
 from cpython cimport PyFloat_Check
 cimport cpython
 
-import numpy as np
 isnan = np.isnan
 cdef double NaN = <double> np.NaN
 cdef double nan = NaN
+cdef double NAN = nan
 
 from datetime import datetime as pydatetime
 
@@ -671,7 +692,7 @@ def value_count_int64(ndarray[int64_t] values):
 
 def array_isnull(arr):
     if np.isscalar(arr) or arr is None:
-        return checknull(arr)
+        return _checknull(arr)
     if arr.dtype.kind in ('O', 'S'):
         # Working around NumPy ticket 1542
         shape = arr.shape
@@ -685,19 +706,50 @@ def array_isnull(arr):
         result = -np.isfinite(arr)
     return result
 
+def typed_null_check(obj, arr):
+    if np.isscalar(arr) or arr is None:
+        return _checknull(obj)
+    if arr.dtype.kind in ('O', 'S'):
+        # Working around NumPy ticket 1542
+        if np.isscalar(obj):
+            result = isnullobj(np.array([obj], dtype=object))
+        else:
+            result = isnullobj(np.array(obj, dtype=object))
+    elif arr.dtype == np.datetime64:
+        # this is the NaT pattern
+        result = obj == NaT
+    else:
+        result = -np.isfinite(obj)
+    return result
+
 def slow_replace(arr, old, new):
     "Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
-    if type(arr) is not np.ndarray:
+    if not isinstance(arr, np.ndarray):
         raise TypeError("`arr` must be a numpy array.")
-    if not issubclass(arr.dtype.type, np.inexact):
-        if int(old) != old:
-            raise ValueError("Cannot safely cast `old` to int.")
-        if int(new) != new:
-            raise ValueError("Cannot safely cast `new` to int.")
-    if array_isnull(old):
-        mask = array_isnull(arr)
+
+    if np.isscalar(old) or old is None:
+        if typed_null_check(old, arr):
+            mask = array_isnull(arr)
+        else:
+            if arr.dtype == np.datetime64:
+                mask = np.array(arr).view('i8') == old
+            else:
+                mask = arr == old
     else:
-        mask = arr == old
+        mask = None
+        old_null = typed_null_check(old, arr)
+        others = old[-old_null]
+        if len(others) > 1:
+            mask = ismember(arr, set(others))
+        elif len(others) == 1:
+            if arr.dtype == np.datetime64:
+                mask = np.array(arr).view('i8') == others[0]
+            else:
+                mask = arr == others[0]
+        if old_null.any():
+            null_mask = array_isnull(arr)
+            mask = null_mask if mask is None else (null_mask | mask)
+
     np.putmask(arr, mask, new)
 
 include "hashtable.pyx"
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 25f2e1a7774bb..642da36ac598b 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3509,23 +3509,116 @@ def test_fillna_columns(self):
         expected = df.astype(float).fillna(axis=1)
         assert_frame_equal(result, expected)
 
+    def test_replace_inplace(self):
+        self.tsframe['A'][:5] = nan
+        self.tsframe['A'][-5:] = nan
+
+        tsframe = self.tsframe.copy()
+        tsframe.replace(nan, 0, inplace=True)
+        assert_frame_equal(tsframe, self.tsframe.fillna(0))
+
+        tsframe = self.tsframe.copy()
+        tsframe.replace(nan, method='pad', inplace=True)
+        assert_frame_equal(tsframe, self.tsframe.fillna(method='pad'))
+
+        # mixed type
+        self.mixed_frame['foo'][5:20] = nan
+        self.mixed_frame['A'][-10:] = nan
+
+        result = self.mixed_frame.replace(np.nan, 0)
+        expected = self.mixed_frame.fillna(value=0)
+        assert_frame_equal(result, expected)
+
     def test_replace(self):
-        pass
+        self.tsframe['A'][:5] = nan
+        self.tsframe['A'][-5:] = nan
 
-    def test_replace_inplace(self):
-        pass
+        zero_filled = self.tsframe.replace(nan, -1e8)
+        assert_frame_equal(zero_filled, self.tsframe.fillna(-1e8))
 
-    def test_replace_method(self):
-        pass
+        assert_frame_equal(zero_filled.replace(-1e8, nan), self.tsframe)
 
-    def test_replace_col_dict(self):
-        pass
+        padded = self.tsframe.replace(nan, method='pad')
+        assert_frame_equal(padded, self.tsframe.fillna(method='pad'))
+
+        # mixed type
+        self.mixed_frame['foo'][5:20] = nan
+        self.mixed_frame['A'][-10:] = nan
+
+        result = self.mixed_frame.replace(np.nan, -1e8)
+        expected = self.mixed_frame.fillna(value=-1e8)
+        assert_frame_equal(result, expected)
+        assert_frame_equal(result.replace(-1e8, nan), self.mixed_frame)
+
+    def test_replace_input_formats(self):
+        to_rep = {'A' : np.nan, 'B' : 0, 'C' : ''}
+        values = {'A' : 0, 'B' : -1, 'C' : 'missing'}
+        df = DataFrame({'A' : [np.nan, 0, np.inf], 'B' : [0, 2, 5],
+                        'C' : ['', 'asdf', 'fd']})
+        filled = df.replace(to_rep, values)
+        expected = {}
+        for k, v in df.iteritems():
+            expected[k] = v.replace(to_rep[k], values[k])
+        assert_frame_equal(filled, DataFrame(expected))
+
+        values = {'A' : 0, 'B' : -1, 'C' : 'missing'}
+        df = DataFrame({'A' : [np.nan, 0, np.nan], 'B' : [0, 2, 5],
+                        'C' : ['', 'asdf', 'fd']})
+        filled = df.replace(np.nan, values)
+        expected = {}
+        for k, v in df.iteritems():
+            expected[k] = v.replace(np.nan, values[k])
+        assert_frame_equal(filled, DataFrame(expected))
+
+        to_rep = [np.nan, 0, '']
+        values = [-2, -1, 'missing']
+        result = df.replace(to_rep, values)
+        expected = df.copy()
+        for i in range(len(to_rep)):
+            expected.replace(to_rep[i], values[i], inplace=True)
+        assert_frame_equal(result, expected)
+
+        to_rep = [np.nan, 0, '']
+        result = df.replace(to_rep, -1)
+        expected = df.copy()
+        for i in range(len(to_rep)):
+            expected.replace(to_rep[i], -1, inplace=True)
+        assert_frame_equal(result, expected)
 
     def test_replace_axis(self):
-        pass
+        self.tsframe['A'][:5] = nan
+        self.tsframe['A'][-5:] = nan
+
+        zero_filled = self.tsframe.replace(nan, 0, axis=1)
+        assert_frame_equal(zero_filled, self.tsframe.fillna(0, axis=1))
+
+        padded = self.tsframe.replace(nan, method='pad', axis=1)
+        assert_frame_equal(padded, self.tsframe.fillna(method='pad', axis=1))
+
+        # mixed type
+        self.mixed_frame['foo'][5:20] = nan
+        self.mixed_frame['A'][-10:] = nan
+
+        result = self.mixed_frame.replace(np.nan, -1e8, axis=1)
+        expected = self.mixed_frame.fillna(value=-1e8, axis=1)
+        assert_frame_equal(result, expected)
 
     def test_replace_limit(self):
-        pass
+        padded = self.tsframe.replace(nan, method='pad', limit=2)
+        assert_frame_equal(padded, self.tsframe.fillna(method='pad',
+                                                       limit=2))
+
+        bfilled = self.tsframe.replace(nan, method='bfill', limit=2)
+        assert_frame_equal(padded, self.tsframe.fillna(method='bfill',
+                                                       limit=2))
+
+        padded = self.tsframe.replace(nan, method='pad', axis=1, limit=2)
+        assert_frame_equal(padded, self.tsframe.fillna(method='pad',
+                                                       axis=1, limit=2))
+
+        bfill = self.tsframe.replace(nan, method='bfill', axis=1, limit=2)
+        assert_frame_equal(padded, self.tsframe.fillna(method='bfill',
+                                                       axis=1, limit=2))
 
     def test_truncate(self):
         offset = datetools.bday
@@ -5596,31 +5689,6 @@ def test_bool_raises_value_error_1069(self):
         df = DataFrame([1, 2, 3])
         self.failUnlessRaises(ValueError, lambda: bool(df))
 
-    def test_replace(self):
-        N = 100
-        df = DataFrame(np.fabs(np.random.randn(len(N), 5)),
-                       index=tm.makeDataIndex(N))
-        df.ix[:5, 0] = np.nan
-        df[6:10, 1] = 'foo'
-        df[20:30, 2] = 'bar'
-
-        rs = df.replace([np.nan, 'foo', 'bar'], -1)
-        self.assert_((rs.ix[:5, 0] == -1).all())
-        self.assert_((rs.ix[6:10, 1] == -1).all())
-        self.assert_((rs.ix[20:30, 2] == -1).all())
-        self.assert_((df >= 0).all())
-
-        rs = df.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
-        self.assert_((rs.ix[:5, 0] == -1).all())
-        self.assert_((rs.ix[6:10, 1] == -2).all())
-        self.assert_((rs.ix[20:30, 2] == -3).all())
-        self.assert_((df >= 0).all())
-
-        df.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
-        self.assert_((df.ix[:5, 0] == -1).all())
-        self.assert_((df.ix[6:10, 1] == -1).all())
-        self.assert_((df.ix[20:30, 2] == -1).all())
-
 if __name__ == '__main__':
     # unittest.main()
     import nose
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 8a2652f751f68..e1441e9d7f4ff 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1294,43 +1294,6 @@ def test_pivot(self):
         # corner case, empty
         df = pivot(np.array([]), np.array([]), np.array([]))
 
-    def test_replace(self):
-        N = 100
-        df1 = DataFrame(np.fabs(np.random.randn(len(N), 5)),
-                        index=tm.makeDataIndex(N))
-        df1.ix[:5, 0] = np.nan
-        df1[6:10, 1] = 'foo'
-        df1[20:30, 2] = 'bar'
-
-        df2 = DataFrame(np.fabs(np.random.randn(len(N), 5)),
-                       index=tm.makeDataIndex(N))
-        df2.ix[:5, 0] = 'bar'
-        df2[6:10, 1] = np.nan
-        df2[20:30, 2] = 'foo'
-
-        panel = Panel({'x' : df1, 'y' : df2})
-        rs = panel.replace([np.nan, 'foo', 'bar'], -1)
-        self.assert_((rs.ix[:, :5, 0] == -1).all())
-        self.assert_((rs.ix[:, 6:10, 1] == -1).all())
-        self.assert_((rs.ix[:, 20:30, 2] == -1).all())
-        self.assert_((panel >= 0).all())
-
-        rs = panel.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
-        self.assert_((rs.ix[0, :5, 0] == -1).all())
-        self.assert_((rs.ix[0, 6:10, 1] == -2).all())
-        self.assert_((rs.ix[0, 20:30, 2] == -3).all())
-
-        self.assert_((rs.ix[1, :5, 0] == -3).all())
-        self.assert_((rs.ix[1, 6:10, 1] == -1).all())
-        self.assert_((rs.ix[1, 20:30, 2] == -2).all())
-
-        self.assert_((panel >= 0).all())
-
-        panel.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
-        self.assert_((panel.ix[:5, 0] == -1).all())
-        self.assert_((panel.ix[6:10, 1] == -1).all())
-        self.assert_((panel.ix[20:30, 2] == -1).all())
-
 def test_monotonic():
     pos = np.array([1, 2, 3, 5])
 
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index c52eb06b698f8..6ea5b7d94ae09 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -15,6 +15,7 @@
                     bdate_range, date_range)
 from pandas.core.index import MultiIndex
 from pandas.tseries.index import Timestamp, DatetimeIndex
+import pandas._tseries as lib
 
 import pandas.core.datetools as datetools
 import pandas.core.nanops as nanops
@@ -2526,6 +2527,72 @@ def test_fillna_inplace(self):
         expected = x.fillna(value=0)
         assert_series_equal(y2, expected)
 
+    def test_replace(self):
+        N = 100
+        ser = Series(np.random.randn(N))
+        ser[0:4] = np.nan
+        ser[6:10] = 0
+
+        # replace list with a single value
+        rs = ser.replace([np.nan], -1, inplace=True)
+        exp = ser.fillna(-1)
+        assert_series_equal(rs, exp)
+
+        rs = ser.replace(0., np.nan)
+        ser[ser == 0.] = np.nan
+        assert_series_equal(rs, ser)
+
+        ser = Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
+                     dtype=object)
+        ser[:5] = np.nan
+        ser[6:10] = 'foo'
+        ser[20:30] = 'bar'
+
+        # replace list with a single value
+        rs = ser.replace([np.nan, 'foo', 'bar'], -1)
+
+        self.assert_((rs[:5] == -1).all())
+        self.assert_((rs[6:10] == -1).all())
+        self.assert_((rs[20:30] == -1).all())
+        self.assert_((isnull(ser[:5])).all())
+
+        # replace with different values
+        rs = ser.replace({np.nan : -1, 'foo' : -2, 'bar' : -3})
+
+        self.assert_((rs[:5] == -1).all())
+        self.assert_((rs[6:10] == -2).all())
+        self.assert_((rs[20:30] == -3).all())
+        self.assert_((isnull(ser[:5])).all())
+
+        # replace with different values with 2 lists
+        rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
+        assert_series_equal(rs, rs2)
+
+        # replace with forward fill not considering np.nan missing
+        s2 = ser.copy()
+        s2[5] = np.nan
+        rs3 = s2.replace(['foo', 'bar'])
+        self.assert_(isnull(rs3[6]))
+
+        # replace with back fill considering np.nan as missing
+        rs4 = ser.replace([np.nan, 'foo', 'bar'], method='bfill')
+        assert_almost_equal(rs4[4], ser[5])
+
+        # replace inplace
+        ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
+        self.assert_((ser[:5] == -1).all())
+        self.assert_((ser[6:10] == -1).all())
+        self.assert_((ser[20:30] == -1).all())
+
+        ser = Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT])
+        assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
+        filled = ser.copy()
+        filled[4] = 0
+        assert_series_equal(ser.replace(np.inf, 0), filled)
+
+        ser = Series(self.ts.index)
+        assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
+
     def test_asfreq(self):
         ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30),
                                          datetime(2009, 11, 30),
diff --git a/vb_suite/replace.py b/vb_suite/replace.py
new file mode 100644
index 0000000000000..bc5397df2c66d
--- /dev/null
+++ b/vb_suite/replace.py
@@ -0,0 +1,24 @@
+from vbench.api import Benchmark
+
+common_setup = """from pandas_vb_common import *
+from datetime import timedelta
+import pandas._tseries as lib
+N = 1000000
+
+try:
+    rng = date_range('1/1/2000', periods=N, freq='min')
+except NameError:
+    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
+    date_range = DateRange
+
+ts = Series(np.random.randn(N), index=rng)
+
+def replace_slow(ser, old, new):
+    lib.slow_replace(ser.values, old, new)
+    return ser
+"""
+
+replace_fillna = Benchmark('ts.fillna(0., inplace=True)', common_setup)
+replace_replacena = Benchmark('ts.replace(np.nan, 0., inplace=True)',
+                              common_setup)
+replace_putmask = Benchmark('replace_slow(ts, np.nan, 0.)', common_setup)

From 2f5319de4ac4ae8906395b49e07ac8cf94639dc8 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 14:37:41 -0400
Subject: [PATCH 087/114] removed bottleneck calls from replace

---
 pandas/core/common.py          |  31 ++
 pandas/core/frame.py           |  81 ++---
 pandas/core/internals.py       |  10 +-
 pandas/core/nanops.py          |   1 -
 pandas/core/series.py          |   6 +-
 pandas/src/codegen_replace.py  | 187 -----------
 pandas/src/codegen_template.py | 408 -----------------------
 pandas/src/replace.pyx         | 575 ---------------------------------
 pandas/src/tseries.pyx         |  62 ----
 9 files changed, 84 insertions(+), 1277 deletions(-)
 delete mode 100644 pandas/src/codegen_replace.py
 delete mode 100644 pandas/src/codegen_template.py
 delete mode 100644 pandas/src/replace.pyx

diff --git a/pandas/core/common.py b/pandas/core/common.py
index cb1e457fa1c0a..6e92e55f203de 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -97,6 +97,37 @@ def notnull(obj):
         return not res
     return -res
 
+def mask_missing(arr, values_to_mask):
+    """
+    Return a masking array of same size/shape as arr
+    with entries equaling any member of values_to_mask set to True
+    """
+    if np.isscalar(values_to_mask):
+        values_to_mask = [values_to_mask]
+
+    try:
+        values_to_mask = np.array(values_to_mask, dtype=arr.dtype)
+    except Exception:
+        values_to_mask = np.array(values_to_mask, dtype=object)
+
+    na_mask = isnull(values_to_mask)
+    nonna = values_to_mask[-na_mask]
+
+    mask = None
+    for x in nonna:
+        if mask is None:
+            mask = arr == x
+        else:
+            mask = mask | (arr == x)
+
+    if na_mask.any():
+        if mask is None:
+            mask = isnull(arr)
+        else:
+            mask = mask | isnull(arr)
+
+    return mask
+
 def _pickle_array(arr):
     arr = arr.view(np.ndarray)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b8e9c3a12a382..333f91f94a67d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2736,44 +2736,14 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
         self._consolidate_inplace()
 
         if value is None:
-            if self._is_mixed_type and axis == 1:
-                return self.T.replace(to_replace, method=method, limit=limit).T
-
-            method = com._clean_fill_method(method)
-
-            if isinstance(to_replace, dict):
-                if axis == 1:
-                    return self.T.replace(to_replace, method=method,
-                                          limit=limit).T
-
-                rs = self if inplace else self.copy()
-                for k, v in to_replace.iteritems():
-                    if k in rs:
-                        rs[k].replace(v, method=method, limit=limit,
-                                      inplace=True)
-                return rs
-
-            else:
-                new_blocks = []
-                for block in self._data.blocks:
-                    newb = block.interpolate(method, axis=axis,
-                                             limit=limit, inplace=inplace,
-                                             missing=to_replace)
-                    new_blocks.append(newb)
-                new_data = BlockManager(new_blocks, self._data.axes)
-
-                if inplace:
-                    self._data = new_data
-                    return self
-                else:
-                    return self._constructor(new_data)
-
+            return self._interpolate(to_replace, method, axis, inplace, limit)
         else:
             # Float type values
             if len(self.columns) == 0:
                 return self
 
             if np.isscalar(to_replace):
+
                 if np.isscalar(value): # np.nan -> 0
                     new_data = self._data.replace(to_replace, value,
                                                   inplace=inplace)
@@ -2786,14 +2756,17 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
                 elif isinstance(value, dict): # np.nan -> {'A' : 0, 'B' : -1}
                     return self._replace_dest_dict(to_replace, value, inplace)
 
+
             elif isinstance(to_replace, dict):
+
                 if np.isscalar(value): # {'A' : np.nan, 'B' : ''} -> 0
                     return self._replace_src_dict(to_replace, value, inplace)
+
                 elif isinstance(value, dict): # {'A' : np.nan} -> {'A' : 0}
                     return self._replace_both_dict(to_replace, value, inplace)
-                else:
-                    raise ValueError('Fill value must be scalar or dict')
-                return rs
+
+                raise ValueError('Fill value must be scalar or dict')
+
 
             elif isinstance(to_replace, (list, np.ndarray)):
                 # [np.nan, ''] -> [0, 'missing']
@@ -2810,14 +2783,48 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
                 else: # [np.nan, ''] -> 0
                     new_data = self._data.replace(to_replace, value,
                                                   inplace=inplace)
+
                 if inplace:
                     self._data = new_data
                     return self
                 else:
                     return self._constructor(new_data)
+
+            raise ValueError('Invalid to_replace type: %s' % type(to_replace))
+
+    def _interpolate(self, to_replace, method, axis, inplace, limit):
+        if self._is_mixed_type and axis == 1:
+            return self.T.replace(to_replace, method=method, limit=limit).T
+
+        method = com._clean_fill_method(method)
+
+        if isinstance(to_replace, dict):
+            if axis == 1:
+                return self.T.replace(to_replace, method=method,
+                                      limit=limit).T
+
+            rs = self if inplace else self.copy()
+            for k, v in to_replace.iteritems():
+                if k in rs:
+                    rs[k].replace(v, method=method, limit=limit,
+                                  inplace=True)
+            return rs
+
+        else:
+            new_blocks = []
+            for block in self._data.blocks:
+                newb = block.interpolate(method, axis=axis,
+                                         limit=limit, inplace=inplace,
+                                         missing=to_replace)
+                new_blocks.append(newb)
+            new_data = BlockManager(new_blocks, self._data.axes)
+
+            if inplace:
+                self._data = new_data
+                return self
             else:
-                raise ValueError('Invalid to_replace type: %s' %
-                                 type(to_replace))
+                return self._constructor(new_data)
+
 
     def _replace_dest_dict(self, to_replace, value, inplace):
         rs = self if inplace else self.copy()
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 574ed8dfc4fdc..cbd1ccfabdeb7 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -232,17 +232,21 @@ def replace(self, to_replace, value, inplace=False):
         if np.isscalar(to_replace):
             if self._can_hold_element(to_replace):
                 to_replace = self._try_cast(to_replace)
-                lib.replace(new_values, to_replace, value)
+                np.putmask(new_values, com.mask_missing(new_values, to_replace),
+                           value)
         else:
             try:
                 to_replace = np.array(to_replace, dtype=self.dtype)
-                lib.replace(new_values, to_replace, value)
+                np.putmask(new_values, com.mask_missing(new_values, to_replace),
+                           value)
             except:
                 to_replace = np.array(to_replace, dtype=object)
                 for r in to_replace:
                     if self._can_hold_element(r):
                         r = self._try_cast(r)
-                        lib.replace(new_values, r, value)
+                np.putmask(new_values, com.mask_missing(new_values, to_replace),
+                           value)
+
         if inplace:
             return self
         else:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 1237d7314af29..8fb01d1a89e17 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -412,4 +412,3 @@ def unique1d(values):
         uniques = table.unique(com._ensure_object(values))
         uniques = lib.list_to_object_array(uniques)
     return uniques
-
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7a539a1b91d17..7957954fa9130 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2202,10 +2202,8 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
         result = self.copy() if not inplace else self
 
         def _rep_one(s, to_rep, v): # replace single value
-            if isinstance(to_rep, (list, np.ndarray)):
-                to_rep = lib.maybe_convert_objects(np.array(to_rep,
-                                                            dtype=object))
-            lib.replace(s.values, to_rep, v)
+            mask = com.mask_missing(s.values, to_rep)
+            np.putmask(s.values, mask, v)
             return s
 
         def _rep_dict(rs, to_rep): # replace {[src] -> dest}
diff --git a/pandas/src/codegen_replace.py b/pandas/src/codegen_replace.py
deleted file mode 100644
index 46ac0242d96ba..0000000000000
--- a/pandas/src/codegen_replace.py
+++ /dev/null
@@ -1,187 +0,0 @@
-from copy import deepcopy
-import numpy as np
-
-#------------------------------------------------------------------------
-# Replace : slightly adapted from bottleneck
-
-loop_template = 'for iINDEX%d in range(nINDEX%d):'
-indent = '    '
-#replace_op = ('%sif mask[INDEXALL]:\n'
-#              '%s    a[INDEXALL] = new%s')
-
-nonna_op = ('%sai = a[INDEXALL]\n'
-            '%sif ai == old:\n'
-            '%s    a[INDEXALL] = new%s')
-na_op = ('%sai = a[INDEXALL]\n'
-         '%sif ai != ai:\n'
-         '%s    a[INDEXALL] = new%s')
-
-generic_top = """
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def NAME_NDIMd_DTYPE_axisAXIS(np.ndarray[np.DTYPE_t, ndim=NDIM] a,
-    double old, double new):
-    "replace (inplace) specified elements of NDIMd array of dtype=DTYPE."
-    cdef np.DTYPE_t ai
-"""
-
-int_check = """\
-        oldint = <np.DTYPE_t>old
-        newint = <np.DTYPE_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-"""
-
-def float_loop(ndims=3, type_suffix=''):
-    loop = {}
-    for n in range(1, ndims + 1):
-        loop_str = indent + 'if old==old: \n'
-        for i in range(n): # for i in range:
-            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
-
-        dent = indent * (n + 2)
-        loop_str += nonna_op % (dent, dent, dent, type_suffix)
-
-        loop_str += '\n' + indent + 'else:\n'
-        for i in range(n): # for i in range:
-            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
-
-        dent = indent * (n + 2)
-        loop_str += na_op % (dent, dent, dent, type_suffix)
-
-        loop[n] = loop_str + '\n'
-    return loop
-
-def int_loop(ndims=3, type_suffix='int'):
-    loop = {}
-    for n in range(1, ndims + 1):
-        loop_str = indent + 'if old==old: \n' + int_check
-        for i in range(n): # for i in range:
-            loop_str += indent * (i + 2) + (loop_template % (i, i)) + '\n'
-
-        dent = indent * (n + 2)
-        loop_str += nonna_op % (dent, dent, dent, type_suffix)
-        loop[n] = loop_str + '\n'
-    return loop
-
-
-# float type functions
-floats = {}
-floats['dtypes'] = ['float32', 'float64']
-floats['axisNone'] = True
-floats['force_output_dtype'] = False
-floats['reuse_non_nan_func'] = False
-floats['top'] = generic_top
-floats['loop'] = float_loop()
-
-# int type functions
-ints = deepcopy(floats)
-ints['dtypes'] = ['int32', 'int64']
-ints['top'] = generic_top + """
-    cdef np.DTYPE_t oldint, newint
-    newint = <np.DTYPE_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-"""
-ints['loop'] = int_loop()
-
-# Slow, unaccelerated ndim/dtype --------------------------------------------
-def replace(arr, old, new):
-    "Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
-    if type(arr) is not np.ndarray:
-        raise TypeError("`arr` must be a numpy array.")
-    if not issubclass(arr.dtype.type, np.inexact):
-        if int(old) != old:
-            raise ValueError("Cannot safely cast `old` to int.")
-        if int(new) != new:
-            raise ValueError("Cannot safely cast `new` to int.")
-    if old != old:
-        mask = np.isnan(arr)
-    else:
-        mask = arr == old
-    np.putmask(arr, mask, new)
-
-slow = {}
-slow['name'] = "replace"
-slow['signature'] = "arr, old, new"
-slow['func'] = "slow_replace(arr, old, new)"
-
-replace = {}
-replace['name'] = 'replace'
-replace['is_reducing_function'] = False
-replace['cdef_output'] = False
-replace['slow'] = slow
-replace['templates'] = {}
-replace['templates']['float_None'] = floats
-replace['templates']['int_None'] = ints
-replace['pyx_file'] = 'replace.pyx'
-
-replace['main'] = '''"replace auto-generated from template"
-
-def replace(arr, old, new):
-    """
-    Replace (inplace) given scalar values of an array with new values.
-
-    similar to putmask but faster
-
-    Parameters
-    ----------
-    arr : numpy.ndarray
-        The input array, which is also the output array since this functions
-        works inplace.
-    old : scalar
-    new : scalar
-        All masked elements in `arr` will be replaced by `new`.
-
-    Returns
-    -------
-    None, the operation is inplace.
-    """
-    func = replace_selector(arr)
-    if np.isscalar(old):
-        return func(arr, old, new)
-    else:
-        for o in old:
-            func(arr, o, new)
-        return arr
-
-def replace_selector(arr):
-    """
-    Return replace function and array that matches `arr`.
-
-    Under the hood Bottleneck uses a separate replace() Cython function for
-    each combination of ndim and dtype. A lot of the overhead in bn.replace()
-    is inselecting the low level function to use.
-
-    You can get rid of the overhead by doing all this before you, for example,
-    enter an inner loop, by using this function.
-
-    Parameters
-    ----------
-    arr : numpy.ndarray
-        Input array.
-
-    Returns
-    -------
-    func : function
-        The replace() function that matches the number of dimensions and dtype
-        of the input array.
-    """
-    axis = None
-    if type(arr) is not np.ndarray:
-        raise TypeError("`arr` must be a numpy array.")
-    cdef int ndim = PyArray_NDIM(arr)
-    cdef int dtype = PyArray_TYPE(arr)
-    cdef tuple key = (ndim, dtype, axis)
-    try:
-        func = replace_dict[key]
-    except KeyError:
-        try:
-            func = replace_slow_dict[axis]
-        except KeyError:
-            tup = (str(ndim), str(arr.dtype), str(axis))
-            raise TypeError("Unsupported ndim/dtype/axis (%s/%s/%s)." % tup)
-    return func
-'''
diff --git a/pandas/src/codegen_template.py b/pandas/src/codegen_template.py
deleted file mode 100644
index a43d936e7cf74..0000000000000
--- a/pandas/src/codegen_template.py
+++ /dev/null
@@ -1,408 +0,0 @@
-"Copied from bottleneck: Turn templates into Cython pyx files."
-import os.path
-
-def template(func):
-    "'Convert template dictionary `func` to a pyx file.'\n"
-    codes = []
-    codes.append(func['main'])
-    select = Selector(func['name'])
-    for key in func['templates']:
-        f = func['templates'][key]
-        code = subtemplate(name=func['name'],
-                           top=f['top'],
-                           loop=f['loop'],
-                           axisNone=f['axisNone'],
-                           dtypes=f['dtypes'],
-                           force_output_dtype=f['force_output_dtype'],
-                           reuse_non_nan_func=f['reuse_non_nan_func'],
-                           is_reducing_function=func['is_reducing_function'],
-                           cdef_output=func['cdef_output'],
-                           select=select)
-        codes.append(code)
-    codes.append('\n' + str(select))
-    if 'slow' in func:
-        if func['slow'] is not None:
-            slow = func['slow']
-            code1 = slow_selector(slow['name'])
-            code2 = slow_functions(slow['name'],
-                                   slow['signature'],
-                                   slow['func'])
-            codes.append(code2)
-            codes.append(code1)
-    modpath = os.path.dirname(__file__)
-    fid = open(os.path.join(modpath, func['pyx_file']), 'w')
-    fid.write(''.join(codes))
-    fid.close()
-
-def subtemplate(name, top, loop, axisNone, dtypes, force_output_dtype,
-                reuse_non_nan_func, is_reducing_function, cdef_output, select):
-    "Assemble template"
-    ndims = sorted(loop.keys())
-    funcs = []
-    for ndim in ndims:
-        if axisNone:
-            axes = [None]
-        else:
-            axes = list(range(ndim))
-        for dtype in dtypes:
-            for axis in axes:
-
-                if reuse_non_nan_func:
-
-                    select.append(ndim, dtype, axis, True)
-
-                else:
-
-                    # Code template
-                    func = top
-
-                    # loop
-                    if force_output_dtype is not False:
-                        ydtype = force_output_dtype
-                    else:
-                        ydtype = dtype
-                    func += loop_cdef(ndim, ydtype, axis, is_reducing_function,
-                                      cdef_output)
-                    func += looper(loop[ndim], ndim, axis)
-
-                    # name, ndim, dtype, axis
-                    func = func.replace('NAME', name)
-                    func = func.replace('NDIM', str(ndim))
-                    func = func.replace('DTYPE', dtype)
-                    func = func.replace('AXIS', str(axis))
-
-                    funcs.append(func)
-                    select.append(ndim, dtype, axis)
-
-    return ''.join(funcs)
-
-def looper(loop, ndim, axis):
-    """
-    Given loop template, expand index markers for given `ndim` and `axis`.
-
-    Parameters
-    ----------
-    loop : str
-        Code of loop where the following template markers will be expanded
-        (example given is for 3d input, similarly for other nd):
-
-        ================= =================================================
-        INDEXALL          Replace with i0, i1, i2
-        INDEXPOP          If axis=1, e.g., replace with i0, i2
-        INDEXN            If N=1, e.g., replace with 1
-        INDEXREPLACE|exp| If exp = 'k - window' and axis=1, e.g., replace
-                          with i0, k - window, i2
-        NREPLACE|exp|     If exp = 'n - window' and axis=1, e.g., replace
-                          with n0, n - window, n2
-        ================= =================================================
-    ndim : int
-        Number of dimensions in the loop.
-    axis : {int, None}
-        Axis over which the loop is evaluated.
-
-    Returns
-    -------
-    code : str
-        Code for the loop with templated index markers expanded.
-
-    Examples
-    --------
-    Make a 3d loop template:
-
-    >>> loop = '''
-    .... for iINDEX0 in range(nINDEX0):
-    ....    for iINDEX1 in range(nINDEX1):
-    ....        amin = MAXDTYPE
-    ....        for iINDEX2 in range(nINDEX2):
-    ....            ai = a[INDEXALL]
-    ....            if ai <= amin:
-    ....                amin = ai
-    ....         y[INDEXPOP] = amin
-    .... '''
-
-    Import the looper function:
-
-    >>> from bottleneck.src.template.template import looper
-
-    Make a loop over axis=0:
-
-    >>> print(looper(loop, ndim=3, axis=0))
-    for i1 in range(n1):
-        for i2 in range(n2):
-            amin = MAXDTYPE
-            for i0 in range(n0):
-                ai = a[i0, i1, i2]
-                if ai <= amin:
-                    amin = ai
-            y[i1, i2] = amin
-
-    Make a loop over axis=1:
-
-    >>> print(looper(loop, ndim=3, axis=1))
-    for i0 in range(n0):
-        for i2 in range(n2):
-            amin = MAXDTYPE
-            for i1 in range(n1):
-                ai = a[i0, i1, i2]
-                if ai <= amin:
-                    amin = ai
-            y[i0, i2] = amin
-
-    Make a loop over axis=2:
-
-    >>> print(looper(loop, ndim=3, axis=2))
-    for i0 in range(n0):
-        for i1 in range(n1):
-            amin = MAXDTYPE
-            for i2 in range(n2):
-                ai = a[i0, i1, i2]
-                if ai <= amin:
-                    amin = ai
-            y[i0, i1] = amin
-
-    """
-
-    if ndim < 1:
-        raise ValueError("ndim(=%d) must be and integer greater than 0" % ndim)
-    if axis is not None:
-        if axis < 0:
-            raise ValueError("`axis` must be a non-negative integer or None")
-        elif axis >= ndim:
-            raise ValueError("`axis` must be less then `ndim`")
-
-    # INDEXALL
-    INDEXALL = ', '.join('i' + str(i) for i in range(ndim))
-    code = loop.replace('INDEXALL', INDEXALL)
-
-    # INDEXPOP
-    idx = list(range(ndim))
-    if axis is not None:
-        idx.pop(axis)
-    INDEXPOP = ', '.join(['i' + str(i) for i in idx])
-    code = code.replace('INDEXPOP', INDEXPOP)
-
-    # INDEXN
-    idx = list(range(ndim))
-    if axis is not None:
-        idxpop = idx.pop(axis)
-        idx.append(idxpop)
-    for i, j in enumerate(idx):
-        code = code.replace('INDEX%d' % i, '%d' % j)
-
-    # INDEXREPLACE|x|
-    mark = 'INDEXREPLACE|'
-    nreplace = code.count(mark)
-    if (nreplace > 0) and (axis is None):
-        raise ValueError("`INDEXREPLACE` cannot be used when axis is None.")
-    while mark in code:
-        idx0 = code.index(mark)
-        idx1 = idx0 + len(mark)
-        idx2 = idx1 + code[idx1:].index('|')
-        if (idx0 >= idx1) or (idx1 >= idx2):
-            raise RuntimeError("Parsing error or poorly formatted input.")
-        replacement = code[idx1:idx2]
-        idx = ['i' + str(i) for i in range(ndim)]
-        idx[axis] = replacement
-        idx = ', '.join(idx)
-        code = code[:idx0] + idx + code[idx2+1:]
-
-    # NREPLACE|x|
-    mark = 'NREPLACE|'
-    nreplace = code.count(mark)
-    # TODO: reuse while loop above, only difference is 'i' --> 'n'
-    while mark in code:
-        idx0 = code.index(mark)
-        idx1 = idx0 + len(mark)
-        idx2 = idx1 + code[idx1:].index('|')
-        if (idx0 >= idx1) or (idx1 >= idx2):
-            raise RuntimeError("Parsing error or poorly formatted input.")
-        replacement = code[idx1:idx2]
-        idx = ['n' + str(i) for i in range(ndim)]
-        idx[axis] = replacement
-        idx = ', '.join(idx)
-        code = code[:idx0] + idx + code[idx2+1:]
-
-    return code
-
-def loop_cdef(ndim, dtype, axis, is_reducing_function, cdef_output=True):
-    """
-    String of code that initializes variables needed in a for loop.
-
-    The output string contains code for: index array counters, one for each
-    dimension (cdef Py_size_t i0, i1, i2, ....); the length along each
-    dimension of the input array, `a` (cdef Py_ssize_t n0 = a.shape[0],...);
-    the initialized, empty output array, `y`.
-
-    Parameters
-    ----------
-    ndim = int
-        Number of dimensions.
-    dtype : str
-        The data type of the output. Used for initilizing the empty output
-        array, `y`.
-    is_reducing_function : bool
-        If True then remove the dimension given by `axis` when initializing
-        the output array, `y`.
-    cdef_output : bool, optional
-        If False then only initialize indices (i) and shapes (n). If True
-        (default) then also intialized output array `y`.
-
-    Returns
-    -------
-    cdefs : str
-        String of code to use to initialize variables needed for loop.
-
-    Examples
-    --------
-    Define parameters:
-
-    >>> ndim = 3
-    >>> dtype = 'float64'
-    >>> axis = 1
-    >>> is_reducing_function = True
-
-    Import loop_cdef:
-
-    >>> from bottleneck.src.template.template import loop_cdef
-
-    Make loop initialization code:
-
-    >>> print(loop_cdef(ndim, dtype, axis, is_reducing_function))
-        cdef Py_ssize_t i0, i1, i2
-        cdef np.npy_intp *dim
-        dim = PyArray_DIMS(a)
-        Py_ssize_t n0 = dim[0]
-        Py_ssize_t n1 = dim[1]
-        Py_ssize_t n2 = dim[2]
-        cdef np.npy_intp *dims = [n0, n2]
-        cdef np.ndarray[np.float64_t, ndim=2] y = PyArray_EMPTY(2, dims,
-                                                  NPY_float64, 0)
-
-    Repeat, but this time make the output non-reducing:
-
-    >>> is_reducing_function = False
-    >>> print(loop_cdef(ndim, dtype, axis, is_reducing_function))
-        cdef Py_ssize_t i0, i1, i2
-        cdef np.npy_intp *dim
-        dim = PyArray_DIMS(a)
-        Py_ssize_t n0 = dim[0]
-        Py_ssize_t n1 = dim[1]
-        Py_ssize_t n2 = dim[2]
-        cdef np.npy_intp *dims = [n0, n1, n2]
-        cdef np.ndarray[np.float64_t, ndim=3] y = PyArray_EMPTY(3, dims,
-                                                  NPY_float64, 0)
-
-    """
-
-    if ndim < 1:
-        raise ValueError("ndim(=%d) must be and integer greater than 0" % ndim)
-    if axis is not None:
-        if axis < 0:
-            raise ValueError("`axis` must be a non-negative integer or None")
-        elif axis >= ndim:
-            raise ValueError("`axis` must be less then `ndim`")
-
-    tab = '    '
-    cdefs = []
-
-    # cdef loop indices
-    idx = ', '.join('i'+str(i) for i in range(ndim))
-    cdefs.append(tab + 'cdef Py_ssize_t ' + idx)
-
-    # Length along each dimension
-    cdefs.append(tab + "cdef np.npy_intp *dim")
-    cdefs.append(tab + "dim = PyArray_DIMS(a)")
-    for dim in range(ndim):
-        cdefs.append(tab + "cdef Py_ssize_t n%d = dim[%d]" % (dim, dim))
-
-    if not cdef_output:
-        return '\n'.join(cdefs) + '\n'
-
-    # cdef initialize output
-    if is_reducing_function:
-        if (ndim > 1) and (axis is not None):
-            idx = list(range(ndim))
-            del idx[axis]
-            ns = ', '.join(['n'+str(i) for i in idx])
-            cdefs.append("%scdef np.npy_intp *dims = [%s]" % (tab, ns))
-            y = "%scdef np.ndarray[np.%s_t, ndim=%d] "
-            y += "y = PyArray_EMPTY(%d, dims,"
-            y += "\n                                              NPY_%s, 0)"
-            cdefs.append(y % (tab, dtype, ndim-1, ndim-1, dtype))
-    else:
-        idx = list(range(ndim))
-        ns = ', '.join('n'+str(i) for i in idx)
-        cdefs.append("%scdef np.npy_intp *dims = [%s]" % (tab, ns))
-        y = "%scdef np.ndarray[np.%s_t, ndim=%d] "
-        y += "y = PyArray_EMPTY(%d, dims,"
-        y += "\n                                              NPY_%s, 0)"
-        cdefs.append(y % (tab, dtype, ndim, ndim, dtype))
-
-    return '\n'.join(cdefs) + '\n'
-
-class Selector(object):
-    "String of code for dictionary that maps dtype to cython function."
-
-    def __init__(self, name):
-        self.name = name
-        self.data = []
-
-    def append(self, ndim, dtype, axis, reuse=False):
-        self.data.append((ndim, dtype, axis, reuse))
-
-    def __str__(self):
-        fmt = "%s_dict[(%s, NPY_%s, %s)] = %s_%sd_%s_axis%s"
-        src = []
-        src.append("cdef dict %s_dict = {}" % self.name)
-        for ndim, dtype, axis, reuse in self.data:
-            name = self.name
-            if reuse:
-                name = name.replace('nan', '')
-            if (ndim == 1) and (axis is None):
-                tup = (self.name, str(ndim), str(dtype), str(0),
-                       name, str(ndim), str(dtype), str(axis))
-                src.append(fmt % tup)
-            tup = (self.name, str(ndim), str(dtype), str(axis),
-                   name, str(ndim), str(dtype), str(axis))
-            src.append(fmt % tup)
-        return '\n'.join(src)
-
-def slow_selector(name, maxaxis=32):
-    "String of code for slow function mapping dictionary."
-    axes = list(range(maxaxis+1)) + [None]
-    src = ['\n']
-    src.append("cdef dict %s_slow_dict = {}" % name)
-    fmt = "%s_slow_dict[%s] = %s_slow_axis%s"
-    for axis in axes:
-        tup = 2 * (name, str(axis))
-        src.append(fmt % tup)
-    return '\n'.join(src)
-
-def slow_functions(name, signature, func, maxaxis=32):
-    "String of code for slow functions."
-    axes = list(range(maxaxis+1)) + [None]
-    tab = '    '
-    sig = "def %s_slow_axis%s(%s):"
-    doc = '%s"Unaccelerated (slow) %s along axis %s."'
-    function = "%sreturn %s\n"
-    src = ['\n']
-    for axis in axes:
-
-        axis = str(axis)
-
-        # signature
-        code = sig % (name, axis, signature)
-        code = code.replace('AXIS', axis)
-        src.append(code)
-
-        # docstring
-        code = doc % (tab, name, axis)
-        code = code.replace('AXIS', axis)
-        src.append(code)
-
-        # function
-        code = function % (tab, func)
-        code = code.replace('AXIS', axis)
-        src.append(code)
-
-    return '\n'.join(src)
diff --git a/pandas/src/replace.pyx b/pandas/src/replace.pyx
deleted file mode 100644
index 4547d368059de..0000000000000
--- a/pandas/src/replace.pyx
+++ /dev/null
@@ -1,575 +0,0 @@
-"replace auto-generated from template"
-
-def replace(arr, old, new):
-    """
-    Replace (inplace) given scalar values of an array with new values.
-
-    similar to putmask but faster
-
-    Parameters
-    ----------
-    arr : numpy.ndarray
-        The input array, which is also the output array since this functions
-        works inplace.
-    old : scalar
-    new : scalar
-        All masked elements in `arr` will be replaced by `new`.
-
-    Returns
-    -------
-    None, the operation is inplace.
-    """
-    func = replace_selector(arr)
-    if np.isscalar(old):
-        return func(arr, old, new)
-    else:
-        for o in old:
-            func(arr, o, new)
-        return arr
-
-def replace_selector(arr):
-    """
-    Return replace function and array that matches `arr`.
-
-    Under the hood Bottleneck uses a separate replace() Cython function for
-    each combination of ndim and dtype. A lot of the overhead in bn.replace()
-    is inselecting the low level function to use.
-
-    You can get rid of the overhead by doing all this before you, for example,
-    enter an inner loop, by using this function.
-
-    Parameters
-    ----------
-    arr : numpy.ndarray
-        Input array.
-
-    Returns
-    -------
-    func : function
-        The replace() function that matches the number of dimensions and dtype
-        of the input array.
-    """
-    axis = None
-    if type(arr) is not np.ndarray:
-        raise TypeError("`arr` must be a numpy array.")
-    cdef int ndim = PyArray_NDIM(arr)
-    cdef int dtype = PyArray_TYPE(arr)
-    cdef tuple key = (ndim, dtype, axis)
-    try:
-        func = replace_dict[key]
-    except KeyError:
-        try:
-            func = replace_slow_dict[axis]
-        except KeyError:
-            tup = (str(ndim), str(arr.dtype), str(axis))
-            raise TypeError("Unsupported ndim/dtype/axis (%s/%s/%s)." % tup)
-    return func
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_1d_int32_axisNone(np.ndarray[np.int32_t, ndim=1] a,
-    double old, double new):
-    "replace (inplace) specified elements of 1d array of dtype=int32."
-    cdef np.int32_t ai
-
-    cdef np.int32_t oldint, newint
-    newint = <np.int32_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    if old==old: 
-        oldint = <np.int32_t>old
-        newint = <np.int32_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai == old:
-                a[i0] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_1d_int64_axisNone(np.ndarray[np.int64_t, ndim=1] a,
-    double old, double new):
-    "replace (inplace) specified elements of 1d array of dtype=int64."
-    cdef np.int64_t ai
-
-    cdef np.int64_t oldint, newint
-    newint = <np.int64_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    if old==old: 
-        oldint = <np.int64_t>old
-        newint = <np.int64_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai == old:
-                a[i0] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_2d_int32_axisNone(np.ndarray[np.int32_t, ndim=2] a,
-    double old, double new):
-    "replace (inplace) specified elements of 2d array of dtype=int32."
-    cdef np.int32_t ai
-
-    cdef np.int32_t oldint, newint
-    newint = <np.int32_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0, i1
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    if old==old: 
-        oldint = <np.int32_t>old
-        newint = <np.int32_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai == old:
-                    a[i0, i1] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_2d_int64_axisNone(np.ndarray[np.int64_t, ndim=2] a,
-    double old, double new):
-    "replace (inplace) specified elements of 2d array of dtype=int64."
-    cdef np.int64_t ai
-
-    cdef np.int64_t oldint, newint
-    newint = <np.int64_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0, i1
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    if old==old: 
-        oldint = <np.int64_t>old
-        newint = <np.int64_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai == old:
-                    a[i0, i1] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_3d_int32_axisNone(np.ndarray[np.int32_t, ndim=3] a,
-    double old, double new):
-    "replace (inplace) specified elements of 3d array of dtype=int32."
-    cdef np.int32_t ai
-
-    cdef np.int32_t oldint, newint
-    newint = <np.int32_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0, i1, i2
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    cdef Py_ssize_t n2 = dim[2]
-    if old==old: 
-        oldint = <np.int32_t>old
-        newint = <np.int32_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai == old:
-                        a[i0, i1, i2] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_3d_int64_axisNone(np.ndarray[np.int64_t, ndim=3] a,
-    double old, double new):
-    "replace (inplace) specified elements of 3d array of dtype=int64."
-    cdef np.int64_t ai
-
-    cdef np.int64_t oldint, newint
-    newint = <np.int64_t>new
-    if newint != new:
-        raise ValueError('Cannot safely cast `new` to int.')
-    cdef Py_ssize_t i0, i1, i2
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    cdef Py_ssize_t n2 = dim[2]
-    if old==old: 
-        oldint = <np.int64_t>old
-        newint = <np.int64_t>new
-        if oldint != old:
-            raise ValueError('Cannot safely cast `old` to int.')
-        if newint != new:
-            raise ValueError('Cannot safely cast `new` to int.')
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai == old:
-                        a[i0, i1, i2] = newint
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_1d_float32_axisNone(np.ndarray[np.float32_t, ndim=1] a,
-    double old, double new):
-    "replace (inplace) specified elements of 1d array of dtype=float32."
-    cdef np.float32_t ai
-    cdef Py_ssize_t i0
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    if old==old: 
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai == old:
-                a[i0] = new
-    else:
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai != ai:
-                a[i0] = new
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_1d_float64_axisNone(np.ndarray[np.float64_t, ndim=1] a,
-    double old, double new):
-    "replace (inplace) specified elements of 1d array of dtype=float64."
-    cdef np.float64_t ai
-    cdef Py_ssize_t i0
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    if old==old: 
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai == old:
-                a[i0] = new
-    else:
-        for i0 in range(n0):
-            ai = a[i0]
-            if ai != ai:
-                a[i0] = new
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_2d_float32_axisNone(np.ndarray[np.float32_t, ndim=2] a,
-    double old, double new):
-    "replace (inplace) specified elements of 2d array of dtype=float32."
-    cdef np.float32_t ai
-    cdef Py_ssize_t i0, i1
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    if old==old: 
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai == old:
-                    a[i0, i1] = new
-    else:
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai != ai:
-                    a[i0, i1] = new
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_2d_float64_axisNone(np.ndarray[np.float64_t, ndim=2] a,
-    double old, double new):
-    "replace (inplace) specified elements of 2d array of dtype=float64."
-    cdef np.float64_t ai
-    cdef Py_ssize_t i0, i1
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    if old==old: 
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai == old:
-                    a[i0, i1] = new
-    else:
-        for i0 in range(n0):
-            for i1 in range(n1):
-                ai = a[i0, i1]
-                if ai != ai:
-                    a[i0, i1] = new
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_3d_float32_axisNone(np.ndarray[np.float32_t, ndim=3] a,
-    double old, double new):
-    "replace (inplace) specified elements of 3d array of dtype=float32."
-    cdef np.float32_t ai
-    cdef Py_ssize_t i0, i1, i2
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    cdef Py_ssize_t n2 = dim[2]
-    if old==old: 
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai == old:
-                        a[i0, i1, i2] = new
-    else:
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai != ai:
-                        a[i0, i1, i2] = new
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def replace_3d_float64_axisNone(np.ndarray[np.float64_t, ndim=3] a,
-    double old, double new):
-    "replace (inplace) specified elements of 3d array of dtype=float64."
-    cdef np.float64_t ai
-    cdef Py_ssize_t i0, i1, i2
-    cdef np.npy_intp *dim
-    dim = PyArray_DIMS(a)
-    cdef Py_ssize_t n0 = dim[0]
-    cdef Py_ssize_t n1 = dim[1]
-    cdef Py_ssize_t n2 = dim[2]
-    if old==old: 
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai == old:
-                        a[i0, i1, i2] = new
-    else:
-        for i0 in range(n0):
-            for i1 in range(n1):
-                for i2 in range(n2):
-                    ai = a[i0, i1, i2]
-                    if ai != ai:
-                        a[i0, i1, i2] = new
-
-cdef dict replace_dict = {}
-replace_dict[(1, NPY_int32, 0)] = replace_1d_int32_axisNone
-replace_dict[(1, NPY_int32, None)] = replace_1d_int32_axisNone
-replace_dict[(1, NPY_int64, 0)] = replace_1d_int64_axisNone
-replace_dict[(1, NPY_int64, None)] = replace_1d_int64_axisNone
-replace_dict[(2, NPY_int32, None)] = replace_2d_int32_axisNone
-replace_dict[(2, NPY_int64, None)] = replace_2d_int64_axisNone
-replace_dict[(3, NPY_int32, None)] = replace_3d_int32_axisNone
-replace_dict[(3, NPY_int64, None)] = replace_3d_int64_axisNone
-replace_dict[(1, NPY_float32, 0)] = replace_1d_float32_axisNone
-replace_dict[(1, NPY_float32, None)] = replace_1d_float32_axisNone
-replace_dict[(1, NPY_float64, 0)] = replace_1d_float64_axisNone
-replace_dict[(1, NPY_float64, None)] = replace_1d_float64_axisNone
-replace_dict[(2, NPY_float32, None)] = replace_2d_float32_axisNone
-replace_dict[(2, NPY_float64, None)] = replace_2d_float64_axisNone
-replace_dict[(3, NPY_float32, None)] = replace_3d_float32_axisNone
-replace_dict[(3, NPY_float64, None)] = replace_3d_float64_axisNone
-
-def replace_slow_axis0(arr, old, new):
-    "Unaccelerated (slow) replace along axis 0."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis1(arr, old, new):
-    "Unaccelerated (slow) replace along axis 1."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis2(arr, old, new):
-    "Unaccelerated (slow) replace along axis 2."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis3(arr, old, new):
-    "Unaccelerated (slow) replace along axis 3."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis4(arr, old, new):
-    "Unaccelerated (slow) replace along axis 4."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis5(arr, old, new):
-    "Unaccelerated (slow) replace along axis 5."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis6(arr, old, new):
-    "Unaccelerated (slow) replace along axis 6."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis7(arr, old, new):
-    "Unaccelerated (slow) replace along axis 7."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis8(arr, old, new):
-    "Unaccelerated (slow) replace along axis 8."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis9(arr, old, new):
-    "Unaccelerated (slow) replace along axis 9."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis10(arr, old, new):
-    "Unaccelerated (slow) replace along axis 10."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis11(arr, old, new):
-    "Unaccelerated (slow) replace along axis 11."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis12(arr, old, new):
-    "Unaccelerated (slow) replace along axis 12."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis13(arr, old, new):
-    "Unaccelerated (slow) replace along axis 13."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis14(arr, old, new):
-    "Unaccelerated (slow) replace along axis 14."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis15(arr, old, new):
-    "Unaccelerated (slow) replace along axis 15."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis16(arr, old, new):
-    "Unaccelerated (slow) replace along axis 16."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis17(arr, old, new):
-    "Unaccelerated (slow) replace along axis 17."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis18(arr, old, new):
-    "Unaccelerated (slow) replace along axis 18."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis19(arr, old, new):
-    "Unaccelerated (slow) replace along axis 19."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis20(arr, old, new):
-    "Unaccelerated (slow) replace along axis 20."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis21(arr, old, new):
-    "Unaccelerated (slow) replace along axis 21."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis22(arr, old, new):
-    "Unaccelerated (slow) replace along axis 22."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis23(arr, old, new):
-    "Unaccelerated (slow) replace along axis 23."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis24(arr, old, new):
-    "Unaccelerated (slow) replace along axis 24."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis25(arr, old, new):
-    "Unaccelerated (slow) replace along axis 25."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis26(arr, old, new):
-    "Unaccelerated (slow) replace along axis 26."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis27(arr, old, new):
-    "Unaccelerated (slow) replace along axis 27."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis28(arr, old, new):
-    "Unaccelerated (slow) replace along axis 28."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis29(arr, old, new):
-    "Unaccelerated (slow) replace along axis 29."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis30(arr, old, new):
-    "Unaccelerated (slow) replace along axis 30."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis31(arr, old, new):
-    "Unaccelerated (slow) replace along axis 31."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axis32(arr, old, new):
-    "Unaccelerated (slow) replace along axis 32."
-    return slow_replace(arr, old, new)
-
-def replace_slow_axisNone(arr, old, new):
-    "Unaccelerated (slow) replace along axis None."
-    return slow_replace(arr, old, new)
-
-
-cdef dict replace_slow_dict = {}
-replace_slow_dict[0] = replace_slow_axis0
-replace_slow_dict[1] = replace_slow_axis1
-replace_slow_dict[2] = replace_slow_axis2
-replace_slow_dict[3] = replace_slow_axis3
-replace_slow_dict[4] = replace_slow_axis4
-replace_slow_dict[5] = replace_slow_axis5
-replace_slow_dict[6] = replace_slow_axis6
-replace_slow_dict[7] = replace_slow_axis7
-replace_slow_dict[8] = replace_slow_axis8
-replace_slow_dict[9] = replace_slow_axis9
-replace_slow_dict[10] = replace_slow_axis10
-replace_slow_dict[11] = replace_slow_axis11
-replace_slow_dict[12] = replace_slow_axis12
-replace_slow_dict[13] = replace_slow_axis13
-replace_slow_dict[14] = replace_slow_axis14
-replace_slow_dict[15] = replace_slow_axis15
-replace_slow_dict[16] = replace_slow_axis16
-replace_slow_dict[17] = replace_slow_axis17
-replace_slow_dict[18] = replace_slow_axis18
-replace_slow_dict[19] = replace_slow_axis19
-replace_slow_dict[20] = replace_slow_axis20
-replace_slow_dict[21] = replace_slow_axis21
-replace_slow_dict[22] = replace_slow_axis22
-replace_slow_dict[23] = replace_slow_axis23
-replace_slow_dict[24] = replace_slow_axis24
-replace_slow_dict[25] = replace_slow_axis25
-replace_slow_dict[26] = replace_slow_axis26
-replace_slow_dict[27] = replace_slow_axis27
-replace_slow_dict[28] = replace_slow_axis28
-replace_slow_dict[29] = replace_slow_axis29
-replace_slow_dict[30] = replace_slow_axis30
-replace_slow_dict[31] = replace_slow_axis31
-replace_slow_dict[32] = replace_slow_axis32
-replace_slow_dict[None] = replace_slow_axisNone
\ No newline at end of file
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 03644d809b9e2..18bdd8f6644da 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -690,68 +690,6 @@ def value_count_int64(ndarray[int64_t] values):
 
     return result_keys, result_counts
 
-def array_isnull(arr):
-    if np.isscalar(arr) or arr is None:
-        return _checknull(arr)
-    if arr.dtype.kind in ('O', 'S'):
-        # Working around NumPy ticket 1542
-        shape = arr.shape
-        result = np.empty(shape, dtype=bool)
-        vec = isnullobj(arr.ravel())
-        result[:] = vec.reshape(shape)
-    elif arr.dtype == np.datetime64:
-        # this is the NaT pattern
-        result = np.array(arr).view('i8') == NaT
-    else:
-        result = -np.isfinite(arr)
-    return result
-
-def typed_null_check(obj, arr):
-    if np.isscalar(arr) or arr is None:
-        return _checknull(obj)
-    if arr.dtype.kind in ('O', 'S'):
-        # Working around NumPy ticket 1542
-        if np.isscalar(obj):
-            result = isnullobj(np.array([obj], dtype=object))
-        else:
-            result = isnullobj(np.array(obj, dtype=object))
-    elif arr.dtype == np.datetime64:
-        # this is the NaT pattern
-        result = obj == NaT
-    else:
-        result = -np.isfinite(obj)
-    return result
-
-def slow_replace(arr, old, new):
-    "Slow replace (inplace) used for unaccelerated ndim/dtype combinations."
-    if not isinstance(arr, np.ndarray):
-        raise TypeError("`arr` must be a numpy array.")
-
-    if np.isscalar(old) or old is None:
-        if typed_null_check(old, arr):
-            mask = array_isnull(arr)
-        else:
-            if arr.dtype == np.datetime64:
-                mask = np.array(arr).view('i8') == old
-            else:
-                mask = arr == old
-    else:
-        mask = None
-        old_null = typed_null_check(old, arr)
-        others = old[-old_null]
-        if len(others) > 1:
-            mask = ismember(arr, set(others))
-        elif len(others) == 1:
-            if arr.dtype == np.datetime64:
-                mask = np.array(arr).view('i8') == others[0]
-            else:
-                mask = arr == others[0]
-        if old_null.any():
-            null_mask = array_isnull(arr)
-            mask = null_mask if mask is None else (null_mask | mask)
-
-    np.putmask(arr, mask, new)
-
 include "hashtable.pyx"
 include "datetime.pyx"
 include "skiplist.pyx"

From 245c1264328054ba6e40e01ec5229adbad399115 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 14:44:18 -0400
Subject: [PATCH 088/114] moved mask_missing to common

---
 pandas/core/series.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7957954fa9130..03ec825034e2d 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2238,7 +2238,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
                     raise ValueError('must specify a fill method')
                 fill_f = _get_fill_func(method)
 
-                mask = _mask_missing(result, to_replace)
+                mask = com.mask_missing(result, to_replace)
                 fill_f(result.values, limit=limit, mask=mask)
 
                 if not inplace:
@@ -2709,16 +2709,6 @@ def _get_fill_func(method):
         fill_f = com.backfill_1d
     return fill_f
 
-def _mask_missing(series, missing_values):
-    missing_values = np.array(list(missing_values), dtype=object)
-    if isnull(missing_values).any():
-        missing_values = missing_values[notnull(missing_values)]
-        mask = isnull(series) | series.isin(missing_values)
-    else:
-        mask = series.isin(missing_values)
-    return mask
-
-
 #----------------------------------------------------------------------
 # Add plotting methods to Series
 

From 35220b4da6ab1b3e23cf531a2c43b8f7f0d15e83 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 14:53:43 -0400
Subject: [PATCH 089/114] TST: extra test case for Series.replace

---
 pandas/tests/test_series.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 6ea5b7d94ae09..1ae6a3edcdf79 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -2584,6 +2584,9 @@ def test_replace(self):
         self.assert_((ser[6:10] == -1).all())
         self.assert_((ser[20:30] == -1).all())
 
+        ser = Series([np.nan, 0, np.inf])
+        assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
+
         ser = Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT])
         assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
         filled = ser.copy()

From 40a0cb15d940301050c3ed1f45cfcf044d38da2e Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 16:20:39 -0400
Subject: [PATCH 090/114] removed remaining references to replace code
 generation

---
 pandas/src/generate_code.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
index eb458dd8508e3..77c4469958632 100644
--- a/pandas/src/generate_code.py
+++ b/pandas/src/generate_code.py
@@ -939,14 +939,6 @@ def generate_from_template(template, ndim=1, exclude=None):
                 take_2d_axis1_template,
                 take_2d_multi_template]
 
-
-# templates_1d_datetime = [take_1d_template]
-# templates_2d_datetime = [take_2d_axis0_template,
-#                          take_2d_axis1_template]
-def codegen_pyx(funcs):
-    for func in funcs:
-        pyx_template(funcs[func])
-
 def generate_take_cython_file(path='generated.pyx'):
     with open(path, 'w') as f:
         print >> f, header
@@ -966,10 +958,5 @@ def generate_take_cython_file(path='generated.pyx'):
         for template in nobool_1d_templates:
             print >> f, generate_from_template(template, exclude=['bool'])
 
-        print >> f, generate_ensure_dtypes()
-
-        # print >> f, generate_put_functions()
-        codegen_pyx({'replace': replace})
-
 if __name__ == '__main__':
     generate_take_cython_file()

From 76355d0b9cb198976bd55f9e5d93c5658de28a50 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 16:31:36 -0400
Subject: [PATCH 091/114] DOC: release note re: #929

---
 RELEASE.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 61e10b964a895..5b1327302cd7f 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -51,6 +51,8 @@ pandas 0.8.0
   - New ordered_merge functions for merging DataFrames with ordered
     data. Also supports group-wise merging for panel data (#813)
   - Add keys() method to DataFrame
+  - Add flexible replace method for replacing potentially values to Series and
+    DataFrame (#929, #1241)
 
 **Improvements to existing features**
 

From 927d370b1b8c92f3a7fc469ac59d9b4ff9f82813 Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <starsareblueandfaraway@gmail.com>
Date: Thu, 17 May 2012 11:45:21 -0300
Subject: [PATCH 092/114] Removed erroneous reference to iterating over a
 Series, which iterates over values and not keys

---
 doc/source/basics.rst | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index c038f5e953cb2..3dfc934e80185 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -491,7 +491,7 @@ With a DataFrame, you can simultaneously reindex the index and columns:
    df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one'])
 
 For convenience, you may utilize the ``reindex_axis`` method, which takes the
-labels and a keyword ``axis`` paramater.
+labels and a keyword ``axis`` parameter.
 
 Note that the ``Index`` objects containing the actual axis labels can be
 **shared** between objects. So if we have a Series and a DataFrame, the
@@ -657,7 +657,7 @@ set of labels from an axis:
    df.drop(['a', 'd'], axis=0)
    df.drop(['one'], axis=1)
 
-Note that the following also works, but a bit less obvious / clean:
+Note that the following also works, but is a bit less obvious / clean:
 
 .. ipython:: python
 
@@ -685,13 +685,9 @@ Series, it need only contain a subset of the labels as keys:
    df.rename(columns={'one' : 'foo', 'two' : 'bar'},
              index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'})
 
-The ``rename`` method also provides a ``copy`` named parameter that is by
-default ``True`` and copies the underlying data. Pass ``copy=False`` to rename
-the data in place.
-
 .. _basics.rename_axis:
 
-The Panel class has an a related ``rename_axis`` class which can rename any of
+The Panel class has a related ``rename_axis`` class which can rename any of
 its three axes.
 
 Iteration
@@ -700,7 +696,6 @@ Iteration
 Considering the pandas as somewhat dict-like structure, basic iteration
 produces the "keys" of the objects, namely:
 
-  * **Series**: the index label
   * **DataFrame**: the column labels
   * **Panel**: the item labels
 

From b60c0d3d14a93ffaf605aa641cc73937258c20d4 Mon Sep 17 00:00:00 2001
From: Roy Hyunjin Han <starsareblueandfaraway@gmail.com>
Date: Thu, 17 May 2012 15:27:08 -0300
Subject: [PATCH 093/114] Fixed a few typos

---
 doc/source/indexing.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 2a2614eddbba7..8e769f5194695 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -200,7 +200,7 @@ of the DataFrame):
 
 Consider the ``isin`` method of Series, which returns a boolean vector that is
 true wherever the Series elements exist in the passed list. This allows you to
-select out rows where one or more columns have values you want:
+select rows where one or more columns have values you want:
 
 .. ipython:: python
 
@@ -215,7 +215,7 @@ more complex criteria:
 .. ipython:: python
 
    # only want 'two' or 'three'
-   criterion = df2['a'].map(lambda x: x.startswith('t')
+   criterion = df2['a'].map(lambda x: x.startswith('t'))
 
    df2[criterion]
 
@@ -319,7 +319,7 @@ Duplicate Data
 
 .. _indexing.duplicate:
 
-If you want to indentify and remove duplicate rows in a DataFrame,  there are
+If you want to identify and remove duplicate rows in a DataFrame,  there are
 two methods that will help: ``duplicated`` and ``drop_duplicates``. Each
 takes as an argument the columns to use to identify duplicated rows.
 
@@ -569,7 +569,7 @@ in the pandas 0.4 release. It is very exciting as it opens the door to some
 quite sophisticated data analysis and manipulation, especially for working with
 higher dimensional data. In essence, it enables you to effectively store and
 manipulate arbitrarily high dimension data in a 2-dimensional tabular structure
-(DataFrame), for example. It is not limited to DataFrame
+(DataFrame), for example. It is not limited to DataFrames.
 
 In this section, we will show what exactly we mean by "hierarchical" indexing
 and how it integrates with the all of the pandas indexing functionality

From 49ad7e26adfc004caeaa85b1162206c9f8fe4004 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Thu, 17 May 2012 14:55:20 -0400
Subject: [PATCH 094/114] TST: rephrased .keys call for py3compat

---
 doc/make.py                | 6 ++++--
 pandas/tests/test_frame.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/doc/make.py b/doc/make.py
index d8f2d9840cb68..98767ae67ce43 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -96,7 +96,7 @@ def all():
     # clean()
     html()
 
-def auto_dev_build():
+def auto_dev_build(debug=False):
     msg = ''
     try:
         clean()
@@ -104,7 +104,8 @@ def auto_dev_build():
         latex()
         upload_dev()
         upload_dev_pdf()
-        sendmail()
+        if not debug:
+            sendmail()
     except (Exception, SystemExit), inst:
         msg += str(inst) + '\n'
         sendmail(msg)
@@ -178,6 +179,7 @@ def _get_config():
     'latex'    : latex,
     'clean'    : clean,
     'auto_dev' : auto_dev_build,
+    'auto_debug' : lambda: auto_dev_build(True),
     'all'      : all,
     }
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 642da36ac598b..90c5e8f3e3565 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1323,7 +1323,8 @@ def test_set_columns(self):
                           cols[::2])
 
     def test_keys(self):
-        self.assert_(self.frame.keys() is self.frame.columns)
+        getkeys = self.frame.keys
+        self.assert_(getkeys() is self.frame.columns)
 
     def test_column_contains_typeerror(self):
         try:

From 421f5d3e62cad758f7a2c178026bec4a1aafc769 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Thu, 17 May 2012 19:20:39 -0400
Subject: [PATCH 095/114] DOC: put back doc regarding inplace in rename in
 anticipation of feature

---
 doc/source/basics.rst | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 3dfc934e80185..014bf7ea58f8a 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -685,6 +685,10 @@ Series, it need only contain a subset of the labels as keys:
    df.rename(columns={'one' : 'foo', 'two' : 'bar'},
              index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'})
 
+The ``rename`` method also provides an ``inplace`` named parameter that is by
+default ``False`` and copies the underlying data. Pass ``inplace=True`` to
+rename the data in place.
+
 .. _basics.rename_axis:
 
 The Panel class has a related ``rename_axis`` class which can rename any of
@@ -693,11 +697,13 @@ its three axes.
 Iteration
 ---------
 
-Considering the pandas as somewhat dict-like structure, basic iteration
-produces the "keys" of the objects, namely:
+Because Series is array-like, basic iteration produces the values. Other data
+structures follow the dict-like convention of iterating over the "keys" of the
+objects. In short:
 
-  * **DataFrame**: the column labels
-  * **Panel**: the item labels
+  * **Series**: values
+  * **DataFrame**: column labels
+  * **Panel**: item labels
 
 Thus, for example:
 

From 181f9451ab9bcf95554fc8847e78596eb94b790f Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Thu, 17 May 2012 19:33:57 -0400
Subject: [PATCH 096/114] DOC: reworded description for MultiIndex

---
 doc/source/indexing.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 8e769f5194695..c2ef0d74ced53 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -567,9 +567,9 @@ Hierarchical indexing (MultiIndex)
 Hierarchical indexing (also referred to as "multi-level" indexing) is brand new
 in the pandas 0.4 release. It is very exciting as it opens the door to some
 quite sophisticated data analysis and manipulation, especially for working with
-higher dimensional data. In essence, it enables you to effectively store and
-manipulate arbitrarily high dimension data in a 2-dimensional tabular structure
-(DataFrame), for example. It is not limited to DataFrames.
+higher dimensional data. In essence, it enables you to store and manipulate
+data with an arbitrary number of dimensions in lower dimensional data
+structures like Series (1d) and DataFrame (2d).
 
 In this section, we will show what exactly we mean by "hierarchical" indexing
 and how it integrates with the all of the pandas indexing functionality

From fb1e66242d7da5fcc1b43f1d0296ccdb7b05e286 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 18 May 2012 12:54:42 -0400
Subject: [PATCH 097/114] DOC: started on timeseries.rst for 0.8

---
 doc/source/computation.rst |   8 +-
 doc/source/timeseries.rst  | 180 +++++++++++++++++++++++++++----------
 2 files changed, 139 insertions(+), 49 deletions(-)

diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 25be861295395..f058eab89d067 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -171,10 +171,10 @@ accept the following arguments:
   - ``window``: size of moving window
   - ``min_periods``: threshold of non-null data points to require (otherwise
     result is NA)
-  - ``freq``: optionally specify a :ref: `frequency string <timeseries.freq>` or :ref:`DateOffset <timeseries.offsets>`
-    to pre-conform the data to. Note that prior to pandas v0.8.0, a keyword
-    argument ``time_rule`` was used instead of ``freq`` that referred to
-    the legacy time rule constants
+  - ``freq``: optionally specify a :ref: `frequency string <timeseries.alias>`
+    or :ref:`DateOffset <timeseries.offsets>` to pre-conform the data to.
+    Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used
+    instead of ``freq`` that referred to the legacy time rule constants
 
 These functions can be applied to ndarrays or Series objects:
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index c355c2fb3f1fb..14629412c783a 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -4,28 +4,29 @@
 .. ipython:: python
    :suppress:
 
+   from datetime import datetime
    import numpy as np
    np.random.seed(123456)
    from pandas import *
    randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
    from dateutil import relativedelta
-   from pandas.core.datetools import *
+   from pandas.tseries.api import *
 
 ********************************
 Time Series / Date functionality
 ********************************
 
 pandas has proven very successful as a tool for working with time series data,
-especially in the financial data analysis space. Over the coming year we will
-be looking to consolidate the various Python libraries for time series data,
-e.g. ``scikits.timeseries``, using the new NumPy ``datetime64`` dtype, to
-create a very nice integrated solution. Everything in pandas at the moment is
-based on using Python ``datetime`` objects.
+especially in the financial data analysis space. With the 0.8 release, we have
+further improved the time series API in pandas by leaps and bounds. Using the
+new NumPy ``datetime64`` dtype, we have consolidated a large number of features
+from other Python libraries like ``scikits.timeseries`` as well as created
+a tremendous amount of new functionality for manipulating time series data.
 
 In working with time series data, we will frequently seek to:
 
-  - generate sequences of fixed-frequency dates
+  - generate sequences of fixed-frequency dates and time spans
   - conform or convert time series to a particular frequency
   - compute "relative" dates based on various non-standard time increments
     (e.g. 5 business days before the last business day of the year), or "roll"
@@ -34,18 +35,85 @@ In working with time series data, we will frequently seek to:
 pandas provides a relatively compact and self-contained set of tools for
 performing the above tasks.
 
-.. note::
+.. _timeseries.representation:
+
+Time Stamps vs. Time Spans
+--------------------------
+
+While most time series representations of data associates values with a time
+stamp, in many cases it is more natural to associate the values with a given
+time span. For example, it is easy to think of level variables at a
+particular point in time, but much more intuitive to think of change variables
+over spans of time. Starting with 0.8, pandas allows you to capture both
+representations and convert between them. Under the hood, pandas represents
+timestamps using instances of ``Timestamp`` and sequences of timestamps using
+instances of ``DatetimeIndex``. For regular time spans, pandas uses ``Period``
+objects for scalar values and ``PeriodIndex`` for sequences of spans.
+Better support for irregular intervals with arbitrary start and end points are
+forth-coming in future releases.
+
+For example:
+
+.. ipython:: python
+
+   # Time stamped data
+   dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)]
+   ts = Series(np.random.randn(3), dates)
+
+   type(ts.index)
+
+   ts
+
+   # Time span data
+   periods = PeriodIndex([Period('2012-01'), Period('2012-02'),
+                          Period('2012-03')])
+   ts = Series(np.random.randn(3), periods)
+
+   type(ts.index)
+
+   ts
+
+.. _timeseries.timestamprange:
+
+Generating Ranges of Timestamps
+-------------------------------
+
+To generate an index with time stamps, you can use either the DatetimeIndex or
+Index constructor and pass in a list of datetime objects:
 
-   This area of pandas has gotten less development attention recently, though
-   this should change in the near future.
+.. ipython:: python
+
+   dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)]
+   index = DatetimeIndex(dates)
+   index # Note the frequency information
+
+   index = Index(dates)
+   index # Automatically converted to DatetimeIndex
+
+Practically, this becomes very cumbersome because we often need a very long
+index with a large number of timestamps. If we need timestamps on a regular
+frequency, we can use the pandas functions ``date_range`` and ``bdate_range``
+to create timestamp indexes.
+
+.. ipython:: python
+
+   index = date_range('2000-1-1', periods=1000, freq='M')
+   index
+
+   index = bdate_range('2012-1-1', periods=250)
+   index
 
 .. _timeseries.offsets:
 
 DateOffset objects
 ------------------
 
-A ``DateOffset`` instance represents a frequency increment. Different offset
-logic via subclasses:
+In order to create the sequence of dates with a monthly frequency in the
+previous example, we used the ``freq`` keyword and gave it 'M' as the input.
+Under the hood, the string 'M' is being interpreted into an instance of pandas
+``DateOffset``. ``DateOffset`` represents a regular frequency increment.
+Specific offset logic like "business day" or "one hour" is represented in its
+various subclasses.
 
 .. csv-table::
     :header: "Class name", "Description"
@@ -54,16 +122,24 @@ logic via subclasses:
     DateOffset, "Generic offset class, defaults to 1 calendar day"
     BDay, "business day (weekday)"
     Week, "one week, optionally anchored on a day of the week"
+    WeekOfMonth, "the x-th day of the y-th week of each month"
     MonthEnd, "calendar month end"
+    MonthBegin, "calendar month begin"
     BMonthEnd, "business month end"
+    BMonthBegin, "business month begin"
     QuarterEnd, "calendar quarter end"
+    QuarterBegin, "calendar quarter begin"
     BQuarterEnd, "business quarter end"
+    BQuarterBegin, "business quarter begin"
     YearEnd, "calendar year end"
     YearBegin, "calendar year begin"
     BYearEnd, "business year end"
+    BYearBegin, "business year begin"
     Hour, "one hour"
     Minute, "one minute"
     Second, "one second"
+    Milli, "one millisecond"
+    Micro, "one microsecond"
 
 The basic ``DateOffset`` takes the same arguments as
 ``dateutil.relativedelta``, which works like:
@@ -113,7 +189,7 @@ The ``rollforward`` and ``rollback`` methods do exactly what you would expect:
    offset.rollforward(d)
    offset.rollback(d)
 
-It's definitely worth exploring the ``pandas.core.datetools`` module and the
+It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
 various docstrings for the classes.
 
 Parametric offsets
@@ -130,7 +206,14 @@ particular day of the week:
    d + Week(weekday=4)
    (d + Week(weekday=4)).weekday()
 
-.. _timeseries.freq:
+Another example is parameterizing ``YearEnd`` with the specific ending month:
+
+.. ipython:: python
+
+   d + YearEnd()
+   d + YearEnd(month=6)
+
+.. _timeseries.alias:
 
 Offset Aliases
 ~~~~~~~~~~~~~~
@@ -202,9 +285,9 @@ For some frequencies you can specify an anchoring suffix:
     "(B)A(S)\-OCT", "annual frequency, anchored end of October"
     "(B)A(S)\-NOV", "annual frequency, anchored end of November"
 
-These can be used as arguments to ``date_range``, ``period_range``, constructors
-for ``PeriodIndex`` and ``DatetimeIndex``, as well as various other time
-series-related functions in pandas.
+These can be used as arguments to ``date_range``, ``bdate_range``, constructors
+for ``DatetimeIndex``, as well as various other timeseries-related functions
+in pandas.
 
 Note that prior to v0.8.0, time rules had a slightly different look. Pandas
 will continue to support the legacy time rules for the time being but it is
@@ -242,56 +325,63 @@ strongly recommended that you switch to using the new offset aliases.
     "ms", "L"
     "us": "U"
 
-Note that the legacy quarterly and annual frequencies are business quarter and
-business year ends. Also note the legacy time rule for milliseconds ``ms``
-versus the new offset alias for month start ``MS``. This means that offset
-alias parsing is case sensitive.
+As you can see, legacy quarterly and annual frequencies are business quarter
+and business year ends. Please also note the legacy time rule for milliseconds
+``ms`` versus the new offset alias for month start ``MS``. This means that
+offset alias parsing is case sensitive.
 
 .. _timeseries.daterange:
 
-Generating date ranges (date_range)
------------------------------------
+More on date ranges
+-------------------
 
-The ``date_range`` class utilizes these offsets (and any ones that we might add)
-to generate fixed-frequency date ranges:
+Convenience functions like ``date_range`` and ``bdate_range`` utilizes the
+offsets described above to generate fixed-frequency date ranges. The default
+frequency for ``date_range`` is a **calendar day** while the default for
+``bdate_range`` is a **business day**
 
 .. ipython:: python
 
    start = datetime(2009, 1, 1)
    end = datetime(2010, 1, 1)
 
-   rng = date_range(start, end, freq=BDay())
+   rng = date_range(start, end)
+   rng
+
+   rng = bdate_range(start, end)
    rng
+
+``date_range`` and ``bdate_range`` makes it easy to generate a range of dates
+using various combinations of its parameters like ``start``, ``end``,
+``periods``, and ``freq``:
+
    date_range(start, end, freq=BMonthEnd())
 
-**Business day frequency** is the default for ``date_range``. You can also
-strictly generate a ``date_range`` of a certain length by providing either a
-start or end date and a ``periods`` argument:
+   date_range(start, end, freq=3 * Week())
 
-.. ipython:: python
+   bdate_range(end=end, periods=20)
 
-   date_range(start, periods=20)
-   date_range(end=end, periods=20)
+   bdate_range(start=start, periods=20)
 
 The start and end dates are strictly inclusive. So it will not generate any
 dates outside of those dates if specified.
 
-date_range is a valid Index
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-One of the main uses for ``date_range`` is as an index for pandas objects. When
-working with a lot of time series data, there are several reasons to use
-``date_range`` objects when possible:
+DatetimeIndex
+~~~~~~~~~~~~~
+
+One of the main uses for ``DatetimeIndex`` is as an index for pandas objects.
+The ``DatetimeIndex`` class contains many timeseries related optimizations:
 
   - A large range of dates for various offsets are pre-computed and cached
     under the hood in order to make generating subsequent date ranges very fast
     (just have to grab a slice)
-  - Fast shifting using the ``shift`` method on pandas objects
-  - Unioning of overlapping date_range objects with the same frequency is very
-    fast (important for fast data alignment)
+  - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects
+  - Unioning of overlapping DatetimeIndex objects with the same frequency is
+    very fast (important for fast data alignment)
 
-The ``date_range`` is a valid index and can even be intelligent when doing
-slicing, etc.
+``DatetimeIndex`` can be used like a regular index and offers all of its
+intelligent functionality like selection, slicing, etc.
 
 .. ipython:: python
 
@@ -301,8 +391,8 @@ slicing, etc.
    ts[:5].index
    ts[::2].index
 
-More complicated fancy indexing will result in an ``Index`` that is no longer a
-``date_range``, however:
+However, complicated fancy indexing that breaks the DatetimeIndex's frequency
+regularity will result in an ``Index`` that is no longer a ``DatetimeIndex``:
 
 .. ipython:: python
 
@@ -335,7 +425,7 @@ and in Panel along the ``major_axis``.
 
 The shift method accepts an ``offset`` argument which can accept a
 ``DateOffset`` class or other ``timedelta``-like object or also a :ref:`time
-rule <timeseries.timerule>`:
+rule <timeseries.alias>`:
 
 .. ipython:: python
 

From d4407a9db85aeb04eb8acd34801df1d77e517240 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 18:16:20 -0400
Subject: [PATCH 098/114] REF: microsecond -> nanosecond migration, most of the
 way there #1238

---
 pandas/core/common.py                   |  2 +-
 pandas/core/internals.py                |  2 +-
 pandas/core/nanops.py                   |  2 +-
 pandas/io/pytables.py                   |  2 +-
 pandas/src/datetime.pyx                 | 81 +++++++++++-------------
 pandas/src/engines.pyx                  |  6 +-
 pandas/tests/test_tseries.py            |  2 +
 pandas/tools/tests/test_merge.py        |  2 +-
 pandas/tseries/frequencies.py           | 35 ++++++-----
 pandas/tseries/index.py                 | 67 ++++++++++++--------
 pandas/tseries/offsets.py               | 10 +--
 pandas/tseries/resample.py              | 34 +++++-----
 pandas/tseries/tests/test_timeseries.py | 82 +++++++++++++++++--------
 vb_suite/sparse.py                      |  2 +-
 14 files changed, 186 insertions(+), 143 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index 6e92e55f203de..2da212cbd3bfc 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -67,7 +67,7 @@ def isnull(obj):
 
             if isinstance(obj, Series):
                 result = Series(result, index=obj.index, copy=False)
-        elif obj.dtype == np.datetime64:
+        elif obj.dtype == np.dtype('M8[ns]'):
             # this is the NaT pattern
             result = np.array(obj).view('i8') == lib.NaT
         else:
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index cbd1ccfabdeb7..7e8e67274a0a4 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1125,7 +1125,7 @@ def form_blocks(data, axes):
 
     if len(datetime_dict):
         datetime_block = _simple_blockify(datetime_dict, items,
-                                          np.dtype('M8[us]'))
+                                          np.dtype('M8[ns]'))
         blocks.append(datetime_block)
 
     if len(bool_dict):
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 8fb01d1a89e17..e742bdb55379a 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -406,7 +406,7 @@ def unique1d(values):
                            dtype=np.int64)
 
         if values.dtype == np.datetime64:
-            uniques = uniques.view('M8[us]')
+            uniques = uniques.view('M8[ns]')
     else:
         table = lib.PyObjectHashTable(len(values))
         uniques = table.unique(com._ensure_object(values))
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index f41952d399a69..7ac5ad901b548 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -963,7 +963,7 @@ def _read_array(group, key):
 
 def _unconvert_index(data, kind):
     if kind == 'datetime64':
-        index = np.array(data, dtype='M8[us]')
+        index = np.array(data, dtype='M8[ns]')
     elif kind == 'datetime':
         index = np.array([datetime.fromtimestamp(v) for v in data],
                          dtype=object)
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index 4627e0bd8facd..f623376bd77a6 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -47,10 +47,9 @@ except NameError: # py3
 # This serves as the box for datetime64
 class Timestamp(_Timestamp):
 
-    __slots__ = ['value', 'offset']
-
     def __new__(cls, object ts_input, object offset=None, tz=None):
         cdef _TSObject ts
+        cdef _Timestamp ts_base
 
         if isinstance(ts_input, float):
             # to do, do we want to support this, ie with fractional seconds?
@@ -72,6 +71,7 @@ class Timestamp(_Timestamp):
         # fill out rest of data
         ts_base.value = ts.value
         ts_base.offset = offset
+        ts_base.nanosecond = ts.dts.ps / 1000
 
         return ts_base
 
@@ -185,7 +185,7 @@ def apply_offset(ndarray[object] values, object offset):
         ndarray[int64_t] new_values
         object boxed
 
-    result = np.empty(n, dtype='M8[us]')
+    result = np.empty(n, dtype='M8[ns]')
     new_values = result.view('i8')
     pass
 
@@ -194,8 +194,8 @@ def apply_offset(ndarray[object] values, object offset):
 # (see Timestamp class above). This will serve as a C extension type that
 # shadows the python class, where we do any heavy lifting.
 cdef class _Timestamp(datetime):
-    cdef:
-        int64_t value       # numpy int64
+    cdef public:
+        int64_t value, nanosecond
         object offset       # frequency reference
 
     def __add__(self, other):
@@ -250,13 +250,13 @@ cpdef convert_to_tsobject(object ts, object tz=None):
 
     if is_datetime64_object(ts):
         obj.value = unbox_datetime64_scalar(ts)
-        pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us, &obj.dts)
+        pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
     elif is_integer_object(ts):
         obj.value = ts
-        pandas_datetime_to_datetimestruct(ts, PANDAS_FR_us, &obj.dts)
+        pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
     elif util.is_string_object(ts):
         _string_to_dts(ts, &obj.dts)
-        obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &obj.dts)
+        obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
     elif PyDateTime_Check(ts):
         obj.value = _pydatetime_to_dts(ts, &obj.dts)
         obj.tzinfo = ts.tzinfo
@@ -280,7 +280,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
             obj.value = obj.value + deltas[pos]
 
             if utc_convert:
-                pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us,
+                pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns,
                                                  &obj.dts)
                 obj.tzinfo = tz._tzinfos[inf]
 
@@ -297,7 +297,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
 
 cdef inline object _datetime64_to_datetime(int64_t val):
     cdef pandas_datetimestruct dts
-    pandas_datetime_to_datetimestruct(val, PANDAS_FR_us, &dts)
+    pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
     return _dts_to_pydatetime(&dts)
 
 cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts):
@@ -313,7 +313,7 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
     dts.min = PyDateTime_DATE_GET_MINUTE(val)
     dts.sec = PyDateTime_DATE_GET_SECOND(val)
     dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
-    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 cdef inline int64_t _dtlike_to_datetime64(object val,
                                           pandas_datetimestruct *dts):
@@ -324,7 +324,7 @@ cdef inline int64_t _dtlike_to_datetime64(object val,
     dts.min = val.minute
     dts.sec = val.second
     dts.us = val.microsecond
-    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 cdef inline int64_t _date_to_datetime64(object val,
                                         pandas_datetimestruct *dts):
@@ -335,7 +335,7 @@ cdef inline int64_t _date_to_datetime64(object val,
     dts.min = 0
     dts.sec = 0
     dts.us = 0
-    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 
 cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts) except -1:
@@ -345,7 +345,7 @@ cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts) except -1
 
     if PyUnicode_Check(val):
         val = PyUnicode_AsASCIIString(val);
-    parse_iso_8601_datetime(val, len(val), PANDAS_FR_us, NPY_UNSAFE_CASTING,
+    parse_iso_8601_datetime(val, len(val), PANDAS_FR_ns, NPY_UNSAFE_CASTING,
                             dts, &islocal, &out_bestunit, &special)
     return 0
 
@@ -738,7 +738,7 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
     from dateutil.parser import parse
 
     try:
-        result = np.empty(n, dtype='M8[us]')
+        result = np.empty(n, dtype='M8[ns]')
         iresult = result.view('i8')
         for i in range(n):
             val = strings[i]
@@ -903,7 +903,7 @@ def _get_transitions(tz):
     Get UTC times of DST transitions
     """
     if tz not in trans_cache:
-        arr = np.array(tz._utc_transition_times, dtype='M8[us]')
+        arr = np.array(tz._utc_transition_times, dtype='M8[ns]')
         trans_cache[tz] = arr.view('i8')
     return trans_cache[tz]
 
@@ -1009,7 +1009,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):
     mus = out['u']
 
     for i in range(count):
-        pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
         years[i] = dts.year
         months[i] = dts.month
         days[i] = dts.day
@@ -1044,49 +1044,49 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     if field == 'Y':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.year
         return out
 
     elif field == 'M':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.month
         return out
 
     elif field == 'D':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.day
         return out
 
     elif field == 'h':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.hour
         return out
 
     elif field == 'm':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.min
         return out
 
     elif field == 's':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.sec
         return out
 
     elif field == 'us':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.us
         return out
 
     elif field == 'doy':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             isleap = is_leapyear(dts.year)
             out[i] = _month_offset[isleap, dts.month-1] + dts.day
         return out
@@ -1099,7 +1099,7 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     elif field == 'woy':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             isleap = is_leapyear(dts.year)
             out[i] = _month_offset[isleap, dts.month - 1] + dts.day
             out[i] = ((out[i] - 1) / 7) + 1
@@ -1107,7 +1107,7 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
 
     elif field == 'q':
         for i in range(count):
-            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
+            pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
             out[i] = dts.month
             out[i] = ((out[i] - 1) / 3) + 1
         return out
@@ -1119,7 +1119,7 @@ cdef inline int m8_weekday(int64_t val):
     ts = convert_to_tsobject(val)
     return ts_dayofweek(ts)
 
-cdef int64_t DAY_US = 86400000000LL
+cdef int64_t DAY_NS = 86400000000000LL
 
 def values_at_time(ndarray[int64_t] stamps, int64_t time):
     cdef:
@@ -1133,18 +1133,14 @@ def values_at_time(ndarray[int64_t] stamps, int64_t time):
         return np.empty(0, dtype=np.int64)
 
     # is this OK?
-    # days = stamps // DAY_US
-    times = stamps % DAY_US
+    # days = stamps // DAY_NS
+    times = stamps % DAY_NS
 
-    # Microsecond resolution
+    # Nanosecond resolution
     count = 0
     for i in range(1, n):
         if times[i] == time:
             count += 1
-        # cur = days[i]
-        # if cur > last:
-        #     count += 1
-        #     last = cur
 
     indexer = np.empty(count, dtype=np.int64)
 
@@ -1155,11 +1151,6 @@ def values_at_time(ndarray[int64_t] stamps, int64_t time):
             indexer[j] = i
             j += 1
 
-        # cur = days[i]
-        # if cur > last:
-        #     j += 1
-        #     last = cur
-
     return indexer
 
 
@@ -1170,12 +1161,12 @@ def date_normalize(ndarray[int64_t] stamps):
         pandas_datetimestruct dts
 
     for i in range(n):
-        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
         dts.hour = 0
         dts.min = 0
         dts.sec = 0
         dts.us = 0
-        result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
+        result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
 
     return result
 
@@ -1185,7 +1176,7 @@ def dates_normalized(ndarray[int64_t] stamps):
         pandas_datetimestruct dts
 
     for i in range(n):
-        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
         if (dts.hour + dts.min + dts.sec + dts.us) > 0:
             return False
 
@@ -1250,7 +1241,7 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
     out = np.empty(l, dtype='i8')
 
     for i in range(l):
-        pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_us, &dts)
+        pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
         out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
                                   dts.hour, dts.min, dts.sec, freq)
     return out
@@ -1349,7 +1340,7 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
     dts.sec = int(dinfo.second)
     dts.us = 0
 
-    return pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
+    return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
 
 def period_ordinal_to_string(int64_t value, int freq):
     cdef:
diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx
index b465dc3707705..5c16ebb5fddc7 100644
--- a/pandas/src/engines.pyx
+++ b/pandas/src/engines.pyx
@@ -415,20 +415,20 @@ cdef class DatetimeEngine(Int64Engine):
 
     def get_indexer(self, values):
         self._ensure_mapping_populated()
-        if values.dtype != 'M8':
+        if values.dtype != 'M8[ns]':
             return np.repeat(-1, len(values)).astype('i4')
         values = np.asarray(values).view('i8')
         return self.mapping.lookup(values)
 
     def get_pad_indexer(self, other, limit=None):
-        if other.dtype != 'M8':
+        if other.dtype != 'M8[ns]':
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
         return _algos.pad_int64(self._get_index_values(), other,
                                 limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
-        if other.dtype != 'M8':
+        if other.dtype != 'M8[ns]':
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
         return _algos.backfill_int64(self._get_index_values(), other,
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 318f782371f73..57f154384bf91 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -197,6 +197,8 @@ def test_maybe_booleans_to_slice():
     result = lib.maybe_booleans_to_slice(arr)
     assert(result.dtype == np.bool_)
 
+    result = lib.maybe_booleans_to_slice(arr[:0])
+    assert(result == slice(0, 0))
 
 def test_convert_objects():
     arr = np.array(['a', 'b', nan, nan, 'd', 'e', 'f'], dtype='O')
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
index 701acfddf5ea5..8253ad4e1e1db 100644
--- a/pandas/tools/tests/test_merge.py
+++ b/pandas/tools/tests/test_merge.py
@@ -1198,7 +1198,7 @@ def test_concat_series(self):
         result = concat(pieces, keys=[0, 1, 2])
         expected = ts.copy()
 
-        ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[us]'))
+        ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]'))
 
         exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
                       np.arange(len(ts))]
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index fe198b10132ec..4501e1d6a6257 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -696,6 +696,12 @@ def infer_freq(index, warn=True):
     inferer = _FrequencyInferer(index, warn=warn)
     return inferer.get_freq()
 
+_ONE_MICRO = 1000L
+_ONE_MILLI = _ONE_MICRO * 1000
+_ONE_SECOND = _ONE_MILLI * 1000
+_ONE_MINUTE = 60 * _ONE_SECOND
+_ONE_HOUR = 60 * _ONE_MINUTE
+_ONE_DAY = 24 * _ONE_HOUR
 
 class _FrequencyInferer(object):
     """
@@ -727,31 +733,34 @@ def is_monotonic(self):
     def get_freq(self):
 
         delta = self.deltas[0]
-        if _is_multiple(delta, _day_us):
+        if _is_multiple(delta, _ONE_DAY):
             return self._infer_daily_rule()
         else:
             # Possibly intraday frequency
             if not self.is_unique:
                 return None
-            if _is_multiple(delta, 60 * 60 * 1000000):
+            if _is_multiple(delta, _ONE_HOUR):
                 # Hours
-                return _maybe_add_count('H', delta / (60 * 60 * 1000000))
-            elif _is_multiple(delta, 60 * 1000000):
+                return _maybe_add_count('H', delta / _ONE_HOUR)
+            elif _is_multiple(delta, _ONE_MINUTE):
                 # Minutes
-                return _maybe_add_count('T', delta / (60 * 1000000))
-            elif _is_multiple(delta, 1000000):
+                return _maybe_add_count('T', delta / _ONE_MINUTE)
+            elif _is_multiple(delta, _ONE_SECOND):
                 # Seconds
-                return _maybe_add_count('S', delta / 1000000)
-            elif _is_multiple(delta, 1000):
+                return _maybe_add_count('S', delta / _ONE_SECOND)
+            elif _is_multiple(delta, _ONE_MILLI):
                 # Milliseconds
-                return _maybe_add_count('L', delta / 1000)
-            else:
+                return _maybe_add_count('L', delta / _ONE_MILLI)
+            elif _is_multiple(delta, _ONE_MICRO):
                 # Microseconds
+                return _maybe_add_count('L', delta / _ONE_MICRO)
+            else:
+                # Nanoseconds
                 return _maybe_add_count('U', delta)
 
     @cache_readonly
     def day_deltas(self):
-        return [x / _day_us for x in self.deltas]
+        return [x / _ONE_DAY for x in self.deltas]
 
     @cache_readonly
     def fields(self):
@@ -828,7 +837,7 @@ def _infer_daily_rule(self):
             return monthly_rule
 
         if self.is_unique:
-            days = self.deltas[0] / _day_us
+            days = self.deltas[0] / _ONE_DAY
             if days % 7 == 0:
                 # Weekly
                 alias = _weekday_rule_aliases[self.rep_stamp.weekday()]
@@ -990,5 +999,3 @@ def _is_weekly(rule):
 
 def _is_multiple(us, mult):
     return us % mult == 0
-
-_day_us = 24 * 60 * 60 * 1000000
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 36814876f4e17..4b3e639907b81 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1,3 +1,5 @@
+# pylint: disable=E1101
+
 from datetime import time, datetime
 from datetime import timedelta
 
@@ -64,7 +66,7 @@ def wrapper(left, right):
         results = joinf(left, right)
         if with_indexers:
             join_index, left_indexer, right_indexer = results
-            join_index = join_index.view('M8')
+            join_index = join_index.view('M8[ns]')
             return join_index, left_indexer, right_indexer
         return results
     return wrapper
@@ -128,7 +130,6 @@ class DatetimeIndex(Int64Index):
     ----------
     data  : array-like (1-dimensional), optional
         Optional datetime-like data to construct index with
-    dtype : NumPy dtype (default: M8[us])
     copy  : bool
         Make a copy of input ndarray
     freq : string or pandas offset object, optional
@@ -169,7 +170,7 @@ class DatetimeIndex(Int64Index):
 
     def __new__(cls, data=None,
                 freq=None, start=None, end=None, periods=None,
-                dtype=None, copy=False, name=None, tz=None,
+                copy=False, name=None, tz=None,
                 verify_integrity=True, normalize=False, **kwds):
 
         warn = False
@@ -225,7 +226,7 @@ def __new__(cls, data=None,
             if lib.is_string_array(data):
                 data = _str_to_dt_array(data, offset)
             else:
-                data = np.asarray(data, dtype='M8[us]')
+                data = np.asarray(data, dtype='M8[ns]')
 
         if issubclass(data.dtype.type, basestring):
             subarr = _str_to_dt_array(data, offset)
@@ -235,11 +236,11 @@ def __new__(cls, data=None,
                 offset = data.offset
                 verify_integrity = False
             else:
-                subarr = np.array(data, dtype='M8[us]', copy=copy)
+                subarr = np.array(data, dtype='M8[ns]', copy=copy)
         elif issubclass(data.dtype.type, np.integer):
-            subarr = np.array(data, dtype='M8[us]', copy=copy)
+            subarr = np.array(data, dtype='M8[ns]', copy=copy)
         else:
-            subarr = np.array(data, dtype='M8[us]', copy=copy)
+            subarr = np.array(data, dtype='M8[ns]', copy=copy)
 
         if tz is not None:
             tz = tools._maybe_get_tz(tz)
@@ -247,7 +248,7 @@ def __new__(cls, data=None,
             ints = subarr.view('i8')
             lib.tz_localize_check(ints, tz)
             subarr = lib.tz_convert(ints, tz, _utc())
-            subarr = subarr.view('M8[us]')
+            subarr = subarr.view('M8[ns]')
 
         subarr = subarr.view(cls)
         subarr.name = name
@@ -312,7 +313,7 @@ def _generate(cls, start, end, periods, name, offset,
             ints = index.view('i8')
             lib.tz_localize_check(ints, tz)
             index = lib.tz_convert(ints, tz, _utc())
-            index = index.view('M8[us]')
+            index = index.view('M8[ns]')
 
         index = index.view(cls)
         index.name = name
@@ -354,7 +355,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
                                  end=_CACHE_END)
 
             arr = np.array(_to_m8_array(list(xdr)),
-                           dtype='M8[us]', copy=False)
+                           dtype='M8[ns]', copy=False)
 
             cachedRange = arr.view(DatetimeIndex)
             cachedRange.offset = offset
@@ -448,7 +449,7 @@ def __setstate__(self, state):
             # extract the raw datetime data, turn into datetime64
             index_state = state[0]
             raw_data = index_state[0][4]
-            raw_data = np.array(raw_data, dtype='M8[us]')
+            raw_data = np.array(raw_data, dtype='M8[ns]')
             new_state = raw_data.__reduce__()
             np.ndarray.__setstate__(self, new_state[2])
         else:  # pragma: no cover
@@ -476,8 +477,8 @@ def __sub__(self, other):
 
     def _add_delta(self, delta):
         if isinstance(delta, (Tick, timedelta)):
-            inc = offsets._delta_to_microseconds(delta)
-            new_values = (self.asi8 + inc).view('M8[us]')
+            inc = offsets._delta_to_nanoseconds(delta)
+            new_values = (self.asi8 + inc).view('M8[ns]')
         else:
             new_values = self.astype('O') + delta
         return DatetimeIndex(new_values, tz=self.tz, freq='infer')
@@ -496,6 +497,13 @@ def summary(self, name=None):
 
         return result
 
+    def astype(self, dtype):
+        dtype = np.dtype(dtype)
+
+        if dtype == np.object_:
+            return self.asobject
+        return Index.astype(self, dtype)
+
     @property
     def asi8(self):
         # do not cache or you'll create a memory leak
@@ -545,7 +553,6 @@ def order(self, return_indexer=False, ascending=True):
             return self._simple_new(sorted_values, self.name, None,
                                     self.tz)
 
-
     def snap(self, freq='S'):
         """
         Snap time stamps to nearest occuring frequency
@@ -554,7 +561,7 @@ def snap(self, freq='S'):
         # Superdumb, punting on any optimizing
         freq = to_offset(freq)
 
-        snapped = np.empty(len(self), dtype='M8[us]')
+        snapped = np.empty(len(self), dtype='M8[ns]')
 
         for i, v in enumerate(self):
             s = v
@@ -565,7 +572,7 @@ def snap(self, freq='S'):
                     s = t0
                 else:
                     s = t1
-            snapped[i] = np.datetime64(s)
+            snapped[i] = s
 
         # we know it conforms; skip check
         return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
@@ -633,6 +640,12 @@ def union(self, other):
         -------
         y : Index or DatetimeIndex
         """
+        if not isinstance(other, DatetimeIndex):
+            try:
+                other = DatetimeIndex(other)
+            except TypeError:
+                pass
+
         this, other = self._maybe_utc_convert(other)
 
         if this._can_fast_union(other):
@@ -879,8 +892,8 @@ def _indices_at_time(self, key):
 
         # TODO: time object with tzinfo?
 
-        mus = _time_to_microsecond(key)
-        indexer = lib.values_at_time(self.asi8, mus)
+        nanos = _time_to_nanosecond(key)
+        indexer = lib.values_at_time(self.asi8, nanos)
         return com._ensure_platform_int(indexer)
 
     def _get_string_slice(self, key):
@@ -990,7 +1003,7 @@ def __iter__(self):
 
     def searchsorted(self, key, side='left'):
         if isinstance(key, np.ndarray):
-            key = np.array(key, dtype='M8[us]', copy=False)
+            key = np.array(key, dtype='M8[ns]', copy=False)
         else:
             key = _to_m8(key)
 
@@ -1015,7 +1028,7 @@ def _constructor(self):
 
     @property
     def dtype(self):
-        return np.dtype('M8')
+        return np.dtype('M8[ns]')
 
     @property
     def is_all_dates(self):
@@ -1107,7 +1120,7 @@ def tz_localize(self, tz):
 
         # Convert to UTC
         new_dates = lib.tz_convert(self.asi8, tz, _utc())
-        new_dates = new_dates.view('M8[us]')
+        new_dates = new_dates.view('M8[ns]')
         return self._simple_new(new_dates, self.name, self.offset, tz)
 
     def tz_validate(self):
@@ -1138,7 +1151,7 @@ def _generate_regular_range(start, end, periods, offset):
         raise ValueError('Must specify two of start, end, or periods')
 
     if isinstance(offset, Tick):
-        stride = offset.micros
+        stride = offset.nanos
         if periods is None:
             b = Timestamp(start).value
             e = Timestamp(end).value
@@ -1153,12 +1166,12 @@ def _generate_regular_range(start, end, periods, offset):
             raise NotImplementedError
 
         data = np.arange(b, e, stride, dtype=np.int64)
-        data = data.view('M8[us]')
+        data = data.view('M8[ns]')
     else:
         xdr = generate_range(start=start, end=end,
             periods=periods, offset=offset)
 
-        data = np.array(list(xdr), dtype='M8[us]')
+        data = np.array(list(xdr), dtype='M8[ns]')
 
     return data
 
@@ -1247,7 +1260,7 @@ def parser(x):
 
     p_ufunc = np.frompyfunc(parser, 1, 1)
     data = p_ufunc(arr)
-    return np.array(data, dtype='M8[us]')
+    return np.array(data, dtype='M8[ns]')
 
 
 _CACHE_START = Timestamp(datetime(1950, 1, 1))
@@ -1265,6 +1278,6 @@ def _naive_in_cache_range(start, end):
 def _in_range(start, end, rng_start, rng_end):
     return start > rng_start and end < rng_end
 
-def _time_to_microsecond(time):
+def _time_to_nanosecond(time):
     seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
-    return 1000000 * seconds + time.microsecond
+    return (1000000 * seconds + time.microsecond) * 1000
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 98716ed1f57d4..e9c2628f6c30c 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -963,8 +963,8 @@ def delta(self):
         return self._delta
 
     @property
-    def micros(self):
-        return _delta_to_microseconds(self.delta)
+    def nanos(self):
+        return _delta_to_nanoseconds(self.delta)
 
     def apply(self, other):
         if isinstance(other, (datetime, timedelta)):
@@ -990,18 +990,18 @@ def _delta_to_tick(delta):
             else:
                 return Second(seconds)
     else:
-        mus = _delta_to_microseconds(delta)
+        mus = _delta_to_nanoseconds(delta)
         if mus % 1000 == 0:
             return Milli(mus // 1000)
         else:
             return Micro(mus)
 
-def _delta_to_microseconds(delta):
+def _delta_to_nanoseconds(delta):
     if isinstance(delta, Tick):
         delta = delta.delta
     return (delta.days * 24 * 60 * 60 * 1000000
             + delta.seconds * 1000000
-            + delta.microseconds)
+            + delta.microseconds) * 1000
 
 class Day(Tick, CacheableOffset):
     _inc = timedelta(1)
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
index 97025eafa5dc3..f1109dd52f395 100644
--- a/pandas/tseries/resample.py
+++ b/pandas/tseries/resample.py
@@ -237,7 +237,7 @@ def _make_period_bins(axis, freq, begin=None, end=None,
 
 def _get_range_edges(axis, begin, end, offset, closed='left',
                      base=0):
-    from pandas.tseries.offsets import Tick, _delta_to_microseconds
+    from pandas.tseries.offsets import Tick, _delta_to_nanoseconds
     if isinstance(offset, basestring):
         offset = to_offset(offset)
 
@@ -245,9 +245,9 @@ def _get_range_edges(axis, begin, end, offset, closed='left',
         raise ValueError("Rule not a recognized offset")
 
     if isinstance(offset, Tick):
-        day_micros = _delta_to_microseconds(timedelta(1))
+        day_nanos = _delta_to_nanoseconds(timedelta(1))
         # #1165
-        if ((day_micros % offset.micros) == 0 and begin is None
+        if ((day_nanos % offset.nanos) == 0 and begin is None
             and end is None):
             return _adjust_dates_anchored(axis[0], axis[-1], offset,
                                           closed=closed, base=base)
@@ -271,26 +271,26 @@ def _get_range_edges(axis, begin, end, offset, closed='left',
 def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
     from pandas.tseries.tools import normalize_date
 
-    start_day_micros = Timestamp(normalize_date(first)).value
-    last_day_micros = Timestamp(normalize_date(last)).value
+    start_day_nanos = Timestamp(normalize_date(first)).value
+    last_day_nanos = Timestamp(normalize_date(last)).value
 
-    base_micros = (base % offset.n) * offset.micros // offset.n
-    start_day_micros += base_micros
-    last_day_micros += base_micros
+    base_nanos = (base % offset.n) * offset.nanos // offset.n
+    start_day_nanos += base_nanos
+    last_day_nanos += base_nanos
 
-    foffset = (first.value - start_day_micros) % offset.micros
-    loffset = (last.value - last_day_micros) % offset.micros
+    foffset = (first.value - start_day_nanos) % offset.nanos
+    loffset = (last.value - last_day_nanos) % offset.nanos
 
     if closed == 'right':
         if foffset > 0:
             # roll back
             fresult = first.value - foffset
         else:
-            fresult = first.value - offset.micros
+            fresult = first.value - offset.nanos
 
         if loffset > 0:
             # roll forward
-            lresult = last.value + (offset.micros - loffset)
+            lresult = last.value + (offset.nanos - loffset)
         else:
             # already the end of the road
             lresult = last.value
@@ -303,9 +303,9 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
 
         if loffset > 0:
             # roll forward
-            lresult = last.value + (offset.micros - loffset)
+            lresult = last.value + (offset.nanos - loffset)
         else:
-            lresult = last.value + offset.micros
+            lresult = last.value + offset.nanos
 
     return Timestamp(fresult), Timestamp(lresult)
 
@@ -361,11 +361,11 @@ def values_at_time(obj, time, tz=None, asof=False):
 
     # TODO: time object with tzinfo?
 
-    mus = _time_to_microsecond(time)
+    mus = _time_to_nanosecond(time)
     indexer = lib.values_at_time(obj.index.asi8, mus)
     indexer = com._ensure_platform_int(indexer)
     return obj.take(indexer)
 
-def _time_to_microsecond(time):
+def _time_to_nanosecond(time):
     seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
-    return 1000000 * seconds + time.microsecond
+    return 1000000000L * seconds + time.microsecond * 1000
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index c6f5c39cdda7c..5fae73c723aea 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -54,7 +54,7 @@ def test_is_unique_monotonic(self):
 
     def test_index_unique(self):
         uniques = self.dups.index.unique()
-        self.assert_(uniques.dtype == 'M8') # sanity
+        self.assert_(uniques.dtype == 'M8[ns]') # sanity
 
     def test_duplicate_dates_indexing(self):
         ts = self.dups
@@ -310,7 +310,7 @@ def test_frame_ctor_datetime64_column(self):
         dates = np.asarray(rng)
 
         df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates})
-        self.assert_(np.issubdtype(df['B'].dtype, np.datetime64))
+        self.assert_(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]')))
 
     def test_frame_add_datetime64_column(self):
         rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50',
@@ -318,7 +318,7 @@ def test_frame_add_datetime64_column(self):
         df = DataFrame(index=np.arange(len(rng)))
 
         df['A'] = rng
-        self.assert_(np.issubdtype(df['A'].dtype, np.datetime64))
+        self.assert_(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]')))
 
     def test_series_ctor_datetime64(self):
         rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50',
@@ -326,14 +326,14 @@ def test_series_ctor_datetime64(self):
         dates = np.asarray(rng)
 
         series = Series(dates)
-        self.assert_(np.issubdtype(series.dtype, np.datetime64))
+        self.assert_(np.issubdtype(series.dtype, np.dtype('M8[ns]')))
 
     def test_reindex_series_add_nat(self):
         rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
         series = Series(rng)
 
         result = series.reindex(range(15))
-        self.assert_(np.issubdtype(result.dtype, np.datetime64))
+        self.assert_(np.issubdtype(result.dtype, np.dtype('M8[ns]')))
 
         mask = result.isnull()
         self.assert_(mask[-5:].all())
@@ -344,14 +344,14 @@ def test_reindex_frame_add_nat(self):
         df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng})
 
         result = df.reindex(range(15))
-        self.assert_(np.issubdtype(result['B'].dtype, np.datetime64))
+        self.assert_(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]')))
 
         mask = com.isnull(result)['B']
         self.assert_(mask[-5:].all())
         self.assert_(not mask[:-5].any())
 
     def test_series_repr_nat(self):
-        series = Series([0, 1, 2, NaT], dtype='M8[us]')
+        series = Series([0, 1, 2, NaT], dtype='M8[ns]')
 
         result = repr(series)
         expected = ('0          1970-01-01 00:00:00\n'
@@ -361,20 +361,20 @@ def test_series_repr_nat(self):
         self.assertEquals(result, expected)
 
     def test_fillna_nat(self):
-        series = Series([0, 1, 2, NaT], dtype='M8[us]')
+        series = Series([0, 1, 2, NaT], dtype='M8[ns]')
 
         filled = series.fillna(method='pad')
-        filled2 = series.fillna(value=series[2])
+        filled2 = series.fillna(value=series.values[2])
 
         expected = series.copy()
-        expected[3] = expected[2]
+        expected.values[3] = expected.values[2]
 
         assert_series_equal(filled, expected)
         assert_series_equal(filled2, expected)
 
         df = DataFrame({'A': series})
         filled = df.fillna(method='pad')
-        filled2 = df.fillna(value=series[2])
+        filled2 = df.fillna(value=series.values[2])
         expected = DataFrame({'A': expected})
         assert_frame_equal(filled, expected)
         assert_frame_equal(filled2, expected)
@@ -387,7 +387,7 @@ def test_string_na_nat_conversion(self):
         strings = np.array(['1/1/2000', '1/2/2000', np.nan,
                             '1/4/2000, 12:34:56'], dtype=object)
 
-        expected = np.empty(4, dtype='M8')
+        expected = np.empty(4, dtype='M8[ns]')
         for i, val in enumerate(strings):
             if com.isnull(val):
                 expected[i] = NaT
@@ -417,7 +417,7 @@ def test_string_na_nat_conversion(self):
         result = to_datetime(series)
         dresult = to_datetime(dseries)
 
-        expected = Series(np.empty(5, dtype='M8[us]'), index=idx)
+        expected = Series(np.empty(5, dtype='M8[ns]'), index=idx)
         for i in range(5):
             x = series[i]
             if isnull(x):
@@ -659,6 +659,22 @@ def test_datetimeindex_integers_shift(self):
         expected = rng.shift(-5)
         self.assert_(result.equals(expected))
 
+    def test_astype_object(self):
+        # NumPy 1.6.1 weak ns support
+        rng = date_range('1/1/2000', periods=20)
+
+        casted = rng.astype('O')
+        exp_values = list(rng)
+
+        self.assert_(np.array_equal(casted, exp_values))
+
+
+    def test_catch_infinite_loop(self):
+        offset = datetools.DateOffset(minute=5)
+        # blow up, don't loop forever
+        self.assertRaises(Exception, date_range, datetime(2011,11,11),
+                          datetime(2011,11,12), freq=offset)
+
 
 def _simple_ts(start, end, freq='D'):
     rng = date_range(start, end, freq=freq)
@@ -881,7 +897,7 @@ def test_date_range_normalize(self):
 
         offset = timedelta(2)
         values = np.array([snap + i * offset for i in range(n)],
-                          dtype='M8[us]')
+                          dtype='M8[ns]')
 
         self.assert_(np.array_equal(rng, values))
 
@@ -982,8 +998,7 @@ def setUp(self):
         self.series = Series(rand(len(dti)), dti)
 
     def test_datetimeindex_accessors(self):
-        dti = DatetimeIndex(freq='Q-JAN', start=datetime(1997,12,31),
-                            periods=100)
+        dti = DatetimeIndex(freq='Q-JAN', start=datetime(1997,12,31), periods=100)
 
         self.assertEquals(dti.year[0], 1998)
         self.assertEquals(dti.month[0], 1)
@@ -1069,11 +1084,11 @@ def test_datetimeindex_constructor(self):
         idx4 = DatetimeIndex(arr)
 
         arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005',
-                        '2005-01-04'], dtype='M8[us]')
+                        '2005-01-04'], dtype='M8[ns]')
         idx5 = DatetimeIndex(arr)
 
         arr = np.array(['1/1/2005', '1/2/2005', 'Jan 3, 2005',
-                        '2005-01-04'], dtype='M8[us]')
+                        '2005-01-04'], dtype='M8[ns]')
         idx6 = DatetimeIndex(arr)
 
         for other in [idx2, idx3, idx4, idx5, idx6]:
@@ -1116,7 +1131,7 @@ def test_dti_reset_index_round_trip(self):
         dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
         d1 = DataFrame({'v' : np.random.rand(len(dti))}, index=dti)
         d2 = d1.reset_index()
-        self.assert_(d2.dtypes[0] == np.datetime64)
+        self.assert_(d2.dtypes[0] == np.dtype('M8[ns]'))
         d3 = d2.set_index('index')
         assert_frame_equal(d1, d3)
 
@@ -1134,6 +1149,27 @@ def test_datetimeindex_union_join_empty(self):
     # TODO: test merge & concat with datetime64 block
 
 
+class TestTimestamp(unittest.TestCase):
+
+    def test_basics_nanos(self):
+        arr = np.array(['1/1/2000'], dtype='M8[ns]')
+        stamp = Timestamp(arr[0].view('i8') + 500)
+        self.assert_(stamp.year == 2000)
+        self.assert_(stamp.month == 1)
+        self.assert_(stamp.microsecond == 0)
+        self.assert_(stamp.nanosecond == 500)
+
+    def test_comparison(self):
+        arr = np.array(['1/1/2000'], dtype='M8[ns]')
+
+        x = Timestamp(arr[0].view('i8') + 500)
+        y = Timestamp(arr[0].view('i8'))
+
+        self.assert_(arr[0].astype('O') == x)
+        self.assert_(x != y)
+
+"""
+
 class TestNewOffsets(unittest.TestCase):
 
     def test_yearoffset(self):
@@ -1326,13 +1362,7 @@ def test_dayofmonthoffset(self):
                     self.assert_(t.weekday() == day)
 
 
-    def test_catch_infinite_loop(self):
-        offset = datetools.DateOffset(minute=5)
-        # blow up, don't loop forever
-        self.assertRaises(Exception, date_range, datetime(2011,11,11),
-                          datetime(2011,11,12), freq=offset)
-
-
+"""
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py
index 3c068e743697c..18cd71fb45ff8 100644
--- a/vb_suite/sparse.py
+++ b/vb_suite/sparse.py
@@ -14,7 +14,7 @@
 rng = np.asarray(DateRange('1/1/2000', periods=N,
                            offset=datetools.Minute()))
 
-# rng2 = np.asarray(rng).astype('M8[us]').astype('i8')
+# rng2 = np.asarray(rng).astype('M8[ns]').astype('i8')
 
 series = {}
 for i in range(1, K + 1):

From 4f15d542578565b532c6bda88fbc624e8f369d03 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 15 May 2012 18:35:16 -0400
Subject: [PATCH 099/114] BUG: more nano fixes

---
 pandas/src/engines.pyx    | 4 ++--
 pandas/tseries/offsets.py | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx
index 5c16ebb5fddc7..d6d20aabf9bc9 100644
--- a/pandas/src/engines.pyx
+++ b/pandas/src/engines.pyx
@@ -436,11 +436,11 @@ cdef class DatetimeEngine(Int64Engine):
 
 
 cdef inline _to_i8(object val):
+    cdef pandas_datetimestruct dts
     if util.is_datetime64_object(val):
         val = unbox_datetime64_scalar(val)
     elif PyDateTime_Check(val):
-        val = np.datetime64(val)
-        val = unbox_datetime64_scalar(val)
+        return _pydatetime_to_dts(val, &dts)
     return val
 
 # ctypedef fused idxvalue_t:
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index e9c2628f6c30c..46d22700fffb2 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1007,6 +1007,10 @@ class Day(Tick, CacheableOffset):
     _inc = timedelta(1)
     _rule_base = 'D'
 
+    def isAnchored(self):
+
+        return False
+
 class Hour(Tick):
     _inc = timedelta(0, 3600)
     _rule_base = 'H'

From 9bc381470b2d8e9f78f7d4b5734f437871c31dc0 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 19 May 2012 12:28:10 -0400
Subject: [PATCH 100/114] REF: more nanosecond support fixes, test suite passes
 #1238

---
 pandas/core/algorithms.py               |  8 ++++
 pandas/core/common.py                   | 14 +++----
 pandas/core/factor.py                   | 34 ++++++-----------
 pandas/core/format.py                   | 26 ++++++++++---
 pandas/core/index.py                    | 20 ++++++++--
 pandas/core/nanops.py                   |  2 +-
 pandas/io/pytables.py                   |  7 ++--
 pandas/io/tests/test_parsers.py         |  3 +-
 pandas/sparse/frame.py                  | 17 +++++++++
 pandas/src/datetime.pyx                 | 49 +++++++++++++++++++++----
 pandas/src/inference.pyx                |  7 +---
 pandas/src/reduce.pyx                   |  3 ++
 pandas/tests/test_frame.py              |  3 +-
 pandas/tests/test_series.py             |  2 +-
 pandas/tseries/frequencies.py           |  4 +-
 pandas/tseries/index.py                 |  3 +-
 pandas/tseries/offsets.py               | 14 +++++--
 pandas/tseries/period.py                |  5 ++-
 pandas/tseries/tests/test_resample.py   |  2 +-
 pandas/tseries/tests/test_timeseries.py | 36 ++++++++++++++----
 20 files changed, 180 insertions(+), 79 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 44673249dfd4c..d46a199a2baea 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -108,6 +108,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
     Returns
     -------
     """
+    values = np.asarray(values)
+    is_datetime = com.is_datetime64_dtype(values)
     hash_klass, values = _get_data_algo(values, _hashtables)
 
     uniques = []
@@ -129,6 +131,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
         uniques = uniques.take(sorter)
         counts = counts.take(sorter)
 
+    if is_datetime:
+        uniques = np.array(uniques, dtype='M8[ns]')
+
     return labels, uniques, counts
 
 def value_counts(values, sort=True, ascending=False):
@@ -179,6 +184,9 @@ def _get_data_algo(values, func_map):
     if com.is_float_dtype(values):
         f = func_map['float64']
         values = com._ensure_float64(values)
+    elif com.is_datetime64_dtype(values):
+        f = func_map['int64']
+        values = values.view('i8')
     elif com.is_integer_dtype(values):
         f = func_map['int64']
         values = com._ensure_int64(values)
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 2da212cbd3bfc..f8418788b7c40 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -171,7 +171,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
     'int64' : _algos.take_1d_int64,
     'object' : _algos.take_1d_object,
     'bool' : _view_wrapper(_algos.take_1d_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(_algos.take_1d_int64, np.int64,
+    'datetime64[ns]' : _view_wrapper(_algos.take_1d_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
@@ -181,7 +181,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
     'int64' : _algos.take_2d_axis0_int64,
     'object' : _algos.take_2d_axis0_object,
     'bool' : _view_wrapper(_algos.take_2d_axis0_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
+    'datetime64[ns]' : _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
@@ -191,7 +191,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
     'int64' : _algos.take_2d_axis1_int64,
     'object' : _algos.take_2d_axis1_object,
     'bool' : _view_wrapper(_algos.take_2d_axis1_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
+    'datetime64[ns]' : _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
@@ -201,7 +201,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
     'int64' : _algos.take_2d_multi_int64,
     'object' : _algos.take_2d_multi_object,
     'bool' : _view_wrapper(_algos.take_2d_multi_bool, np.uint8),
-    'datetime64[us]' : _view_wrapper(_algos.take_2d_multi_int64, np.int64,
+    'datetime64[ns]' : _view_wrapper(_algos.take_2d_multi_int64, np.int64,
                                      na_override=lib.NaT),
 }
 
@@ -246,7 +246,7 @@ def take_1d(arr, indexer, out=None, fill_value=np.nan):
                                     out.dtype)
                 out = _maybe_upcast(out)
                 np.putmask(out, mask, fill_value)
-    elif dtype_str in ('float64', 'object', 'datetime64[us]'):
+    elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
         if out is None:
             out = np.empty(n, dtype=arr.dtype)
         take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value)
@@ -284,7 +284,7 @@ def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan):
                    _ensure_int64(col_idx), out=out,
                    fill_value=fill_value)
             return out
-    elif dtype_str in ('float64', 'object', 'datetime64[us]'):
+    elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
         out = np.empty(out_shape, dtype=arr.dtype)
         take_f(arr, _ensure_int64(row_idx), _ensure_int64(col_idx), out=out,
                fill_value=fill_value)
@@ -326,7 +326,7 @@ def take_2d(arr, indexer, out=None, mask=None, needs_masking=None, axis=0,
             take_f = _get_take2d_function(dtype_str, axis=axis)
             take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value)
             return out
-    elif dtype_str in ('float64', 'object', 'datetime64[us]'):
+    elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
         if out is None:
             out = np.empty(out_shape, dtype=arr.dtype)
         take_f = _get_take2d_function(dtype_str, axis=axis)
diff --git a/pandas/core/factor.py b/pandas/core/factor.py
index 650ff033f79c9..6bc45924a08f2 100644
--- a/pandas/core/factor.py
+++ b/pandas/core/factor.py
@@ -18,11 +18,17 @@ class Factor(np.ndarray):
       * levels : ndarray
     """
     def __new__(cls, data):
-        data = np.asarray(data, dtype=object)
-        levels, factor = unique_with_labels(data)
-        factor = factor.view(Factor)
-        factor.levels = levels
-        return factor
+        from pandas.core.index import _ensure_index
+        from pandas.core.algorithms import factorize
+
+        try:
+            labels, levels, _ = factorize(data, sort=True)
+        except TypeError:
+            labels, levels, _ = factorize(data, sort=False)
+
+        labels = labels.view(Factor)
+        labels.levels = _ensure_index(levels)
+        return labels
 
     levels = None
 
@@ -51,21 +57,3 @@ def __getitem__(self, key):
         else:
             return np.ndarray.__getitem__(self, key)
 
-
-def unique_with_labels(values):
-    from pandas.core.index import Index
-    rizer = lib.Factorizer(len(values))
-    labels, _ = rizer.factorize(values, sort=False)
-    uniques = Index(rizer.uniques)
-    labels = com._ensure_platform_int(labels)
-    try:
-        sorter = uniques.argsort()
-        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
-        reverse_indexer.put(sorter, np.arange(len(sorter)))
-        labels = reverse_indexer.take(labels)
-        uniques = uniques.take(sorter)
-    except TypeError:
-        pass
-
-    return uniques, labels
-
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 6ae204b944d3a..c22e2df221831 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -571,16 +571,30 @@ def get_result(self):
         if self.formatter:
             formatter = self.formatter
         else:
-            def formatter(x):
-                if isnull(x):
-                    return 'NaT'
-                else:
-                    return str(x)
+            formatter = _format_datetime64
 
         fmt_values = [formatter(x) for x in self.values]
-
         return _make_fixed_width(fmt_values, self.justify)
 
+def _format_datetime64(x):
+    if isnull(x):
+        return 'NaT'
+
+    stamp = lib.Timestamp(x)
+    base = stamp.strftime('%Y-%m-%d %H:%M:%S')
+
+    fraction = stamp.microsecond * 1000 + stamp.nanosecond
+    digits = 9
+
+    if fraction == 0:
+        return base
+
+    while (fraction % 10) == 0:
+        fraction /= 10
+        digits -= 1
+
+    return base + ('.%%.%id' % digits) % fraction
+
 
 def _make_fixed_width(strings, justify='right'):
     if len(strings) == 0:
diff --git a/pandas/core/index.py b/pandas/core/index.py
index dee1764728b92..0b10fbbbd9a89 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -691,8 +691,8 @@ def get_indexer(self, target, method=None, limit=None):
             return pself.get_indexer(ptarget, method=method, limit=limit)
 
         if self.dtype != target.dtype:
-            this = Index(self, dtype=object)
-            target = Index(target, dtype=object)
+            this = self.astype(object)
+            target = target.astype(object)
             return this.get_indexer(target, method=method, limit=limit)
 
         if not self.is_unique:
@@ -1172,8 +1172,12 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
         levels = [_ensure_index(lev) for lev in levels]
         labels = [np.asarray(labs, dtype=np.int_) for labs in labels]
 
-        values = [ndtake(np.asarray(lev), lab)
+        values = [ndtake(lev.values, lab)
                   for lev, lab in zip(levels, labels)]
+
+        # Need to box timestamps, etc.
+        values = _clean_arrays(values)
+
         subarr = lib.fast_zip(values).view(cls)
 
         subarr.levels = levels
@@ -2372,3 +2376,13 @@ def _maybe_box_dtindex(idx):
         return Index(_dt_box_array(idx.asi8), dtype='object')
     return idx
 
+def _clean_arrays(values):
+    result = []
+    for arr in values:
+        if np.issubdtype(arr.dtype, np.datetime_):
+            result.append(lib.map_infer(arr, lib.Timestamp))
+        else:
+            result.append(arr)
+    return result
+
+
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index e742bdb55379a..ad65a589cddfe 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -405,7 +405,7 @@ def unique1d(values):
         uniques = np.array(table.unique(com._ensure_int64(values)),
                            dtype=np.int64)
 
-        if values.dtype == np.datetime64:
+        if issubclass(values.dtype.type, np.datetime_):
             uniques = uniques.view('M8[ns]')
     else:
         table = lib.PyObjectHashTable(len(values))
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 7ac5ad901b548..b8724e854c7ba 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -839,8 +839,7 @@ def _read_panel_table(self, group, where=None):
 
         columns = _maybe_convert(sel.values['column'],
                                  table._v_attrs.columns_kind)
-        index = _maybe_convert(sel.values['index'],
-                               table._v_attrs.index_kind)
+        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
         values = sel.values['values']
 
         major = Factor(index)
@@ -995,7 +994,7 @@ def _maybe_convert(values, val_kind):
 
 def _get_converter(kind):
     if kind == 'datetime64':
-        return lambda x: np.datetime64(x)
+        return lambda x: np.array(x, dtype='M8[ns]')
     if kind == 'datetime':
         return lib.convert_timestamps
     else: # pragma: no cover
@@ -1069,7 +1068,7 @@ def generate(self, where):
             field = c['field']
 
             if field == 'index' and self.index_kind == 'datetime64':
-                val = np.datetime64(value).view('i8')
+                val = lib.Timestamp(value).value
                 self.conditions.append('(%s %s %s)' % (field,op,val))
             elif field == 'index' and isinstance(value, datetime):
                 value = time.mktime(value.timetuple())
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
index 5fccc5a39c47a..f07e95cb2ffb3 100644
--- a/pandas/io/tests/test_parsers.py
+++ b/pandas/io/tests/test_parsers.py
@@ -376,7 +376,8 @@ def test_parse_dates_column_list(self):
         lev = expected.index.levels[0]
         expected.index.levels[0] = lev.to_datetime(dayfirst=True)
         expected['aux_date'] = to_datetime(expected['aux_date'],
-                                           dayfirst=True).astype('O')
+                                           dayfirst=True)
+        expected['aux_date'] = map(Timestamp, expected['aux_date'])
         self.assert_(isinstance(expected['aux_date'][0], datetime))
 
         df = read_csv(StringIO(data), sep=";", index_col = range(4),
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
index 9291d90765377..673d759de2f10 100644
--- a/pandas/sparse/frame.py
+++ b/pandas/sparse/frame.py
@@ -741,6 +741,23 @@ def apply(self, func, axis=0, broadcast=False):
             else:
                 return self._apply_broadcast(func, axis)
 
+    def applymap(self, func):
+        """
+        Apply a function to a DataFrame that is intended to operate
+        elementwise, i.e. like doing map(func, series) for each series in the
+        DataFrame
+
+        Parameters
+        ----------
+        func : function
+            Python function, returns a single value from a single value
+
+        Returns
+        -------
+        applied : DataFrame
+        """
+        return self.apply(lambda x: map(func, x))
+
     @Appender(DataFrame.fillna.__doc__)
     def fillna(self, value=None, method='pad', inplace=False, limit=None):
         new_series = {}
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index f623376bd77a6..a73a71f76c13e 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -136,6 +136,11 @@ class Timestamp(_Timestamp):
         conv = tz.normalize(self)
         return Timestamp(conv)
 
+    def replace(self, **kwds):
+        return Timestamp(datetime.replace(self, **kwds),
+                         offset=self.offset)
+
+
 cdef inline bint is_timestamp(object o):
     return isinstance(o, Timestamp)
 
@@ -194,10 +199,38 @@ def apply_offset(ndarray[object] values, object offset):
 # (see Timestamp class above). This will serve as a C extension type that
 # shadows the python class, where we do any heavy lifting.
 cdef class _Timestamp(datetime):
-    cdef public:
+    cdef readonly:
         int64_t value, nanosecond
         object offset       # frequency reference
 
+    def __richcmp__(_Timestamp self, object other, int op):
+        cdef _Timestamp ots
+
+        if isinstance(other, _Timestamp):
+            ots = other
+        elif isinstance(other, datetime):
+            ots = Timestamp(other)
+        else:
+            if op == 2:
+                return False
+            elif op == 3:
+                return True
+            else:
+                raise TypeError('Cannot compare Timestamp with %s' % str(other))
+
+        if op == 2: # ==
+            return self.value == ots.value
+        elif op == 3: # !=
+            return self.value != ots.value
+        elif op == 0: # <
+            return self.value < ots.value
+        elif op == 1: # <=
+            return self.value <= ots.value
+        elif op == 4: # >
+            return self.value > ots.value
+        elif op == 5: # >=
+            return self.value >= ots.value
+
     def __add__(self, other):
         if is_integer_object(other):
             if self.offset is None:
@@ -313,6 +346,7 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
     dts.min = PyDateTime_DATE_GET_MINUTE(val)
     dts.sec = PyDateTime_DATE_GET_SECOND(val)
     dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
+    dts.ps = dts.as = 0
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 cdef inline int64_t _dtlike_to_datetime64(object val,
@@ -324,6 +358,7 @@ cdef inline int64_t _dtlike_to_datetime64(object val,
     dts.min = val.minute
     dts.sec = val.second
     dts.us = val.microsecond
+    dts.ps = dts.as = 0
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 cdef inline int64_t _date_to_datetime64(object val,
@@ -331,10 +366,8 @@ cdef inline int64_t _date_to_datetime64(object val,
     dts.year = PyDateTime_GET_YEAR(val)
     dts.month = PyDateTime_GET_MONTH(val)
     dts.day = PyDateTime_GET_DAY(val)
-    dts.hour = 0
-    dts.min = 0
-    dts.sec = 0
-    dts.us = 0
+    dts.hour = dts.min = dts.sec = dts.us = 0
+    dts.ps = dts.as = 0
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 
@@ -928,7 +961,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
     arr = np.empty(sz, dtype='i8')
 
     for i in range(sz):
-        arr[i] = int(total_seconds(transinfo[i][0])) * 1000000
+        arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000
 
     return arr
 
@@ -1243,7 +1276,7 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
     for i in range(l):
         pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
         out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
-                                  dts.hour, dts.min, dts.sec, freq)
+                                    dts.hour, dts.min, dts.sec, freq)
     return out
 
 def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
@@ -1338,7 +1371,7 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
     dts.hour = dinfo.hour
     dts.min = dinfo.minute
     dts.sec = int(dinfo.second)
-    dts.us = 0
+    dts.us = dts.ps = 0
 
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
 
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 6c88d293106ab..63e6776abaa22 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -491,15 +491,13 @@ def map_infer(ndarray arr, object f):
     '''
     cdef:
         Py_ssize_t i, n
-        flatiter it
         ndarray[object] result
         object val
 
-    it = <flatiter> PyArray_IterNew(arr)
     n = len(arr)
     result = np.empty(n, dtype=object)
     for i in range(n):
-        val = f(PyArray_GETITEM(arr, PyArray_ITER_DATA(it)))
+        val = f(util.get_value_at(arr, i))
 
         # unbox 0-dim arrays, GH #690
         if is_array(val) and PyArray_NDIM(val) == 0:
@@ -508,9 +506,6 @@ def map_infer(ndarray arr, object f):
 
         result[i] = val
 
-
-        PyArray_ITER_NEXT(it)
-
     return maybe_convert_objects(result, try_float=0)
 
 def to_object_array(list rows):
diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
index 2a956c53f2488..49cdddb4b7740 100644
--- a/pandas/src/reduce.pyx
+++ b/pandas/src/reduce.pyx
@@ -85,11 +85,14 @@ cdef class Reducer:
         except Exception, e:
             if hasattr(e, 'args'):
                 e.args = e.args + (i,)
+            raise
         finally:
             # so we don't free the wrong memory
             chunk.data = dummy_buf
+
         if result.dtype == np.object_:
             result = maybe_convert_objects(result)
+
         return result
 
     def _get_result_array(self, object res):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 90c5e8f3e3565..5310a4b0d7532 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1629,7 +1629,7 @@ def test_constructor_maskedarray_nonfloat(self):
         self.assertEqual(2, frame['C'][2])
 
         # masked np.datetime64 stays (use lib.NaT as null)
-        mat = ma.masked_all((2, 3), dtype=np.datetime64)
+        mat = ma.masked_all((2, 3), dtype='M8[ns]')
         # 2-D input
         frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
 
@@ -5683,7 +5683,6 @@ def test_index_namedtuple(self):
         idx2 = IndexType("baz", "bof")
         index = Index([idx1, idx2], name="composite_index")
         df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
-        print df.ix[IndexType("foo", "bar")]["A"]
         self.assertEqual(df.ix[IndexType("foo", "bar")]["A"], 1)
 
     def test_bool_raises_value_error_1069(self):
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 1ae6a3edcdf79..dae660171dc54 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1359,7 +1359,7 @@ def test_comparison_different_length(self):
         self.assertRaises(ValueError, a.__lt__, b)
 
     def test_between(self):
-        s = Series(bdate_range('1/1/2000', periods=20), dtype=object)
+        s = Series(bdate_range('1/1/2000', periods=20).asobject)
         s[::2] = np.nan
 
         result = s[s.between(s[3], s[17])]
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 4501e1d6a6257..6eb6e94872fee 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -753,10 +753,10 @@ def get_freq(self):
                 return _maybe_add_count('L', delta / _ONE_MILLI)
             elif _is_multiple(delta, _ONE_MICRO):
                 # Microseconds
-                return _maybe_add_count('L', delta / _ONE_MICRO)
+                return _maybe_add_count('U', delta / _ONE_MICRO)
             else:
                 # Nanoseconds
-                return _maybe_add_count('U', delta)
+                return _maybe_add_count('N', delta)
 
     @cache_readonly
     def day_deltas(self):
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 4b3e639907b81..051477fa7027b 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1229,8 +1229,7 @@ def _dt_box_array(arr, offset=None, tz=None):
         return arr
 
     boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz)
-    boxer = np.frompyfunc(boxfunc, 1, 1)
-    return boxer(arr)
+    return lib.map_infer(arr, boxfunc)
 
 
 def _to_m8(key):
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 46d22700fffb2..3db105db4f0c9 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -990,11 +990,13 @@ def _delta_to_tick(delta):
             else:
                 return Second(seconds)
     else:
-        mus = _delta_to_nanoseconds(delta)
-        if mus % 1000 == 0:
-            return Milli(mus // 1000)
+        nanos = _delta_to_nanoseconds(delta)
+        if nanos % 1000000 == 0:
+            return Milli(nanos // 1000000)
+        elif nanos % 1000 == 0:
+            return Micro(nanos // 1000)
         else:
-            return Micro(mus)
+            return Nano(nanos)
 
 def _delta_to_nanoseconds(delta):
     if isinstance(delta, Tick):
@@ -1030,6 +1032,10 @@ class Micro(Tick):
     _inc = timedelta(microseconds=1)
     _rule_base = 'U'
 
+class Nano(Tick):
+    _inc = 1
+    _rule_base = 'N'
+
 BDay = BusinessDay
 BMonthEnd = BusinessMonthEnd
 BMonthBegin = BusinessMonthBegin
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
index a662c35396448..5cae2375cf54a 100644
--- a/pandas/tseries/period.py
+++ b/pandas/tseries/period.py
@@ -466,6 +466,9 @@ def _period_box_array(arr, freq):
     return boxer(arr)
 
 def dt64arr_to_periodarr(data, freq):
+    if data.dtype != np.dtype('M8[ns]'):
+        raise ValueError('Wrong dtype: %s' % data.dtype)
+
     if data is None:
         return data
 
@@ -607,7 +610,7 @@ def __new__(cls, data=None,
                     raise ValueError(('freq not specified and cannot be '
                                       'inferred from first element'))
 
-                if data.dtype == np.datetime64:
+                if issubclass(data.dtype.type, np.datetime_):
                     data = dt64arr_to_periodarr(data, freq)
                 elif data.dtype == np.int64:
                     pass
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
index 875b5c94fa2e1..ce568f5a98162 100644
--- a/pandas/tseries/tests/test_resample.py
+++ b/pandas/tseries/tests/test_resample.py
@@ -54,7 +54,7 @@ def test_custom_grouper(self):
         # construct expected val
         arr = [1] + [5] * 2592
         idx = dti[0:-1:5]
-        idx = idx.append(DatetimeIndex([np.datetime64(dti[-1])]))
+        idx = idx.append(dti[-1:])
         expect = Series(arr, index=idx)
 
         # cython returns float for now
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index 5fae73c723aea..e8f78eead6598 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -351,7 +351,7 @@ def test_reindex_frame_add_nat(self):
         self.assert_(not mask[:-5].any())
 
     def test_series_repr_nat(self):
-        series = Series([0, 1, 2, NaT], dtype='M8[ns]')
+        series = Series([0, 1000, 2000, NaT], dtype='M8[ns]')
 
         result = repr(series)
         expected = ('0          1970-01-01 00:00:00\n'
@@ -1160,13 +1160,35 @@ def test_basics_nanos(self):
         self.assert_(stamp.nanosecond == 500)
 
     def test_comparison(self):
-        arr = np.array(['1/1/2000'], dtype='M8[ns]')
-
-        x = Timestamp(arr[0].view('i8') + 500)
-        y = Timestamp(arr[0].view('i8'))
+        # 5-18-2012 00:00:00.000
+        stamp = 1337299200000000000L
+
+        val = Timestamp(stamp)
+
+        self.assert_(val == val)
+        self.assert_(not val != val)
+        self.assert_(not val < val)
+        self.assert_(val <= val)
+        self.assert_(not val > val)
+        self.assert_(val >= val)
+
+        other = datetime(2012, 5, 18)
+        self.assert_(val == other)
+        self.assert_(not val != other)
+        self.assert_(not val < other)
+        self.assert_(val <= other)
+        self.assert_(not val > other)
+        self.assert_(val >= other)
+
+        other = Timestamp(stamp + 100)
+
+        self.assert_(not val == other)
+        self.assert_(val != other)
+        self.assert_(val < other)
+        self.assert_(val <= other)
+        self.assert_(other > val)
+        self.assert_(other >= val)
 
-        self.assert_(arr[0].astype('O') == x)
-        self.assert_(x != y)
 
 """
 

From b0265668c63b92612d28adf8f79180e7436a8e04 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 19 May 2012 13:03:54 -0400
Subject: [PATCH 101/114] ENH: more nanosecond support #1238

---
 doc/source/io.rst                       |  2 +-
 pandas/core/generic.py                  |  2 +-
 pandas/src/datetime.pyx                 | 21 ++++++++++++++++++++-
 pandas/tseries/offsets.py               | 17 +++++++----------
 pandas/tseries/tests/test_timeseries.py |  4 ++++
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 98a69ba504e87..caa2a8a80d6ae 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -59,7 +59,7 @@ The two workhorse functions for reading text files (a.k.a. flat files) are
 They both use the same parsing code to intelligently convert tabular
 data into a DataFrame object. They can take a number of arguments:
 
-  - ``path_or_buffer``: Either a string path to a file, or any object with a
+  - ``filepath_or_buffer``: Either a string path to a file, or any object with a
     ``read`` method (such as an open file or ``StringIO``).
   - ``sep`` or ``delimiter``: A delimiter / separator to split fields
     on. `read_csv` is capable of inferring the delimiter automatically in some
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 41b293c17461e..1ce05f852dedd 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -215,7 +215,7 @@ def first(self, offset):
         end_date = end = self.index[0] + offset
 
         # Tick-like, e.g. 3 weeks
-        if not offset.isAnchored() and hasattr(offset, 'delta'):
+        if not offset.isAnchored() and hasattr(offset, '_inc'):
             if end_date in self.index:
                 end = self.index.searchsorted(end_date, side='left')
 
diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx
index a73a71f76c13e..93e1ced2d2e64 100644
--- a/pandas/src/datetime.pyx
+++ b/pandas/src/datetime.pyx
@@ -9,6 +9,7 @@ from cpython cimport *
 from datetime cimport *
 from util cimport is_integer_object, is_datetime64_object
 
+from datetime import timedelta
 from dateutil.parser import parse as parse_date
 cimport util
 
@@ -240,7 +241,15 @@ cdef class _Timestamp(datetime):
             else:
                 return Timestamp((self.offset.__mul__(other)).apply(self))
         else:
-            return datetime.__add__(self, other)
+            if isinstance(other, timedelta) or hasattr(other, 'delta'):
+                nanos = _delta_to_nanoseconds(other)
+                return Timestamp(self.value + nanos)
+            else:
+                result = datetime.__add__(self, other)
+                if isinstance(result, datetime):
+                    result = Timestamp(result)
+                    result.nanosecond = self.nanosecond
+                return result
 
     def __sub__(self, other):
         if is_integer_object(other):
@@ -253,6 +262,16 @@ cdef class _Timestamp(datetime):
                                   field)
         return out[0]
 
+def _delta_to_nanoseconds(delta):
+    try:
+        delta = delta.delta
+    except:
+        pass
+    return (delta.days * 24 * 60 * 60 * 1000000
+            + delta.seconds * 1000000
+            + delta.microseconds) * 1000
+
+
 # lightweight C object to hold datetime & int64 pair
 cdef class _TSObject:
     cdef:
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 3db105db4f0c9..fe268003c1109 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -4,6 +4,7 @@
 
 from pandas.core.common import _count_not_none
 from pandas.tseries.tools import to_datetime
+from pandas.util.decorators import cache_readonly
 
 # import after tools, dateutil check
 from dateutil.relativedelta import relativedelta
@@ -408,7 +409,7 @@ def __init__(self, n=1, **kwds):
                 raise Exception('Day must be 0<=day<=6, got %d' %
                                 self.weekday)
 
-        self.delta = timedelta(weeks=1)
+        self._inc = timedelta(weeks=1)
         self.kwds = kwds
 
     def isAnchored(self):
@@ -416,7 +417,7 @@ def isAnchored(self):
 
     def apply(self, other):
         if self.weekday is None:
-            return other + self.n * self.delta
+            return other + self.n * self._inc
 
         if self.n > 0:
             k = self.n
@@ -425,14 +426,14 @@ def apply(self, other):
                 other = other + timedelta((self.weekday - otherDay) % 7)
                 k = k - 1
             for i in xrange(k):
-                other = other + self.delta
+                other = other + self._inc
         else:
             k = self.n
             otherDay = other.weekday()
             if otherDay != self.weekday:
                 other = other + timedelta((self.weekday - otherDay) % 7)
             for i in xrange(-k):
-                other = other - self.delta
+                other = other - self._inc
         return other
 
     def onOffset(self, dt):
@@ -919,7 +920,6 @@ def rule_code(self):
 # Ticks
 
 class Tick(DateOffset):
-    _delta = None
     _inc = timedelta(microseconds=1000)
 
     def __add__(self, other):
@@ -955,12 +955,9 @@ def __ne__(self, other):
         else:
             return DateOffset.__ne__(self, other)
 
-    @property
+    @cache_readonly
     def delta(self):
-        if self._delta is None:
-            self._delta = self.n * self._inc
-
-        return self._delta
+        return self.n * self._inc
 
     @property
     def nanos(self):
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index e8f78eead6598..1868b56176af5 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -1189,6 +1189,10 @@ def test_comparison(self):
         self.assert_(other > val)
         self.assert_(other >= val)
 
+    def test_delta_preserve_nanos(self):
+        val = Timestamp(1337299200000000123L)
+        result = val + timedelta(1)
+        self.assert_(result.nanosecond == val.nanosecond)
 
 """
 

From c3603915b1489ebb50a96f4a10ab4633f19bc893 Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Sun, 13 May 2012 00:17:31 +0300
Subject: [PATCH 102/114] Changes to plotting scatter matrix diagonals

---
 pandas/tools/plotting.py | 122 ++++++++++++++++++++++-----------------
 1 file changed, 68 insertions(+), 54 deletions(-)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index bc43e5454c9b3..36b31de9f8a51 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -3,6 +3,7 @@
 from itertools import izip
 
 import numpy as np
+from scipy import stats
 
 from pandas.util.decorators import cache_readonly
 import pandas.core.common as com
@@ -12,7 +13,7 @@
 from pandas.tseries.offsets import DateOffset
 
 def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
-                   **kwds):
+                   diagonal='hist', **kwds):
     """
     Draw a matrix of scatter plots.
 
@@ -36,64 +37,77 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
 
     for i, a in zip(range(n), df.columns):
         for j, b in zip(range(n), df.columns):
-            axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds)
-            axes[i, j].set_xlabel('')
-            axes[i, j].set_ylabel('')
-            axes[i, j].set_xticklabels([])
-            axes[i, j].set_yticklabels([])
-            ticks = df.index
-
-            is_datetype = ticks.inferred_type in ('datetime', 'date',
+            if i == j:
+                # Deal with the diagonal by drawing a histogram there.
+                if diagonal == 'hist':
+                    axes[i, j].hist(df[a])
+                elif diagonal == 'kde':
+                    y = df[a]
+                    gkde = stats.gaussian_kde(y)
+                    ind = np.linspace(min(y), max(y), 1000)
+                    axes[i, j].plot(ind, gkde.evaluate(ind), **kwds)
+                axes[i, j].yaxis.set_visible(False)
+                axes[i, j].xaxis.set_visible(False)
+                if i == 0 and j == 0:
+                    axes[i, j].yaxis.set_ticks_position('left')
+                    axes[i, j].yaxis.set_label_position('left')
+                    axes[i, j].yaxis.set_visible(True)
+                if i == n - 1 and j == n - 1:
+                    axes[i, j].yaxis.set_ticks_position('right')
+                    axes[i, j].yaxis.set_label_position('right')
+                    axes[i, j].yaxis.set_visible(True)
+            else:
+                axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds)
+                axes[i, j].set_xlabel('')
+                axes[i, j].set_ylabel('')
+                axes[i, j].set_xticklabels([])
+                axes[i, j].set_yticklabels([])
+                ticks = df.index
+
+                is_datetype = ticks.inferred_type in ('datetime', 'date',
                                                   'datetime64')
 
-            if ticks.is_numeric() or is_datetype:
-                """
-                Matplotlib supports numeric values or datetime objects as
-                xaxis values. Taking LBYL approach here, by the time
-                matplotlib raises exception when using non numeric/datetime
-                values for xaxis, several actions are already taken by plt.
-                """
-                ticks = ticks._mpl_repr()
-
-            # setup labels
-            if i == 0 and j % 2 == 1:
-                axes[i, j].set_xlabel(b, visible=True)
-                #axes[i, j].xaxis.set_visible(True)
-                axes[i, j].set_xlabel(b)
-                axes[i, j].set_xticklabels(ticks)
-                axes[i, j].xaxis.set_ticks_position('top')
-                axes[i, j].xaxis.set_label_position('top')
-            if i == n - 1 and j % 2 == 0:
-                axes[i, j].set_xlabel(b, visible=True)
-                #axes[i, j].xaxis.set_visible(True)
-                axes[i, j].set_xlabel(b)
-                axes[i, j].set_xticklabels(ticks)
-                axes[i, j].xaxis.set_ticks_position('bottom')
-                axes[i, j].xaxis.set_label_position('bottom')
-            if j == 0 and i % 2 == 0:
-                axes[i, j].set_ylabel(a, visible=True)
-                #axes[i, j].yaxis.set_visible(True)
-                axes[i, j].set_ylabel(a)
-                axes[i, j].set_yticklabels(ticks)
-                axes[i, j].yaxis.set_ticks_position('left')
-                axes[i, j].yaxis.set_label_position('left')
-            if j == n - 1 and i % 2 == 1:
-                axes[i, j].set_ylabel(a, visible=True)
-                #axes[i, j].yaxis.set_visible(True)
-                axes[i, j].set_ylabel(a)
-                axes[i, j].set_yticklabels(ticks)
-                axes[i, j].yaxis.set_ticks_position('right')
-                axes[i, j].yaxis.set_label_position('right')
+                if ticks.is_numeric() or is_datetype:
+                    """
+                    Matplotlib supports numeric values or datetime objects as
+                    xaxis values. Taking LBYL approach here, by the time
+                    matplotlib raises exception when using non numeric/datetime
+                    values for xaxis, several actions are already taken by plt.
+                    """
+                    ticks = ticks._mpl_repr()
+
+                # setup labels
+                if i == 0 and j % 2 == 1:
+                    axes[i, j].set_xlabel(b, visible=True)
+                    #axes[i, j].xaxis.set_visible(True)
+                    axes[i, j].set_xlabel(b)
+                    axes[i, j].set_xticklabels(ticks)
+                    axes[i, j].xaxis.set_ticks_position('top')
+                    axes[i, j].xaxis.set_label_position('top')
+                if i == n - 1 and j % 2 == 0:
+                    axes[i, j].set_xlabel(b, visible=True)
+                    #axes[i, j].xaxis.set_visible(True)
+                    axes[i, j].set_xlabel(b)
+                    axes[i, j].set_xticklabels(ticks)
+                    axes[i, j].xaxis.set_ticks_position('bottom')
+                    axes[i, j].xaxis.set_label_position('bottom')
+                if j == 0 and i % 2 == 0:
+                    axes[i, j].set_ylabel(a, visible=True)
+                    #axes[i, j].yaxis.set_visible(True)
+                    axes[i, j].set_ylabel(a)
+                    axes[i, j].set_yticklabels(ticks)
+                    axes[i, j].yaxis.set_ticks_position('left')
+                    axes[i, j].yaxis.set_label_position('left')
+                if j == n - 1 and i % 2 == 1:
+                    axes[i, j].set_ylabel(a, visible=True)
+                    #axes[i, j].yaxis.set_visible(True)
+                    axes[i, j].set_ylabel(a)
+                    axes[i, j].set_yticklabels(ticks)
+                    axes[i, j].yaxis.set_ticks_position('right')
+                    axes[i, j].yaxis.set_label_position('right')
 
             axes[i, j].grid(b=grid)
 
-    # ensure {x,y}lim off diagonal are the same as diagonal
-    for i in range(n):
-        for j in range(n):
-            if i != j:
-                axes[i, j].set_xlim(axes[j, j].get_xlim())
-                axes[i, j].set_ylim(axes[i, i].get_ylim())
-
     return axes
 
 def _gca():

From cf74512bf51e4f9c8607349b03f8d793c542e9de Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Mon, 14 May 2012 23:47:07 +0300
Subject: [PATCH 103/114] Changed xtick, ytick labels

---
 pandas/tools/plotting.py | 105 ++++++++++++++++++---------------------
 1 file changed, 48 insertions(+), 57 deletions(-)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 36b31de9f8a51..c172c031e23df 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -46,65 +46,56 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
                     gkde = stats.gaussian_kde(y)
                     ind = np.linspace(min(y), max(y), 1000)
                     axes[i, j].plot(ind, gkde.evaluate(ind), **kwds)
-                axes[i, j].yaxis.set_visible(False)
-                axes[i, j].xaxis.set_visible(False)
-                if i == 0 and j == 0:
-                    axes[i, j].yaxis.set_ticks_position('left')
-                    axes[i, j].yaxis.set_label_position('left')
-                    axes[i, j].yaxis.set_visible(True)
-                if i == n - 1 and j == n - 1:
-                    axes[i, j].yaxis.set_ticks_position('right')
-                    axes[i, j].yaxis.set_label_position('right')
-                    axes[i, j].yaxis.set_visible(True)
             else:
                 axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds)
-                axes[i, j].set_xlabel('')
-                axes[i, j].set_ylabel('')
-                axes[i, j].set_xticklabels([])
-                axes[i, j].set_yticklabels([])
-                ticks = df.index
-
-                is_datetype = ticks.inferred_type in ('datetime', 'date',
-                                                  'datetime64')
-
-                if ticks.is_numeric() or is_datetype:
-                    """
-                    Matplotlib supports numeric values or datetime objects as
-                    xaxis values. Taking LBYL approach here, by the time
-                    matplotlib raises exception when using non numeric/datetime
-                    values for xaxis, several actions are already taken by plt.
-                    """
-                    ticks = ticks._mpl_repr()
-
-                # setup labels
-                if i == 0 and j % 2 == 1:
-                    axes[i, j].set_xlabel(b, visible=True)
-                    #axes[i, j].xaxis.set_visible(True)
-                    axes[i, j].set_xlabel(b)
-                    axes[i, j].set_xticklabels(ticks)
-                    axes[i, j].xaxis.set_ticks_position('top')
-                    axes[i, j].xaxis.set_label_position('top')
-                if i == n - 1 and j % 2 == 0:
-                    axes[i, j].set_xlabel(b, visible=True)
-                    #axes[i, j].xaxis.set_visible(True)
-                    axes[i, j].set_xlabel(b)
-                    axes[i, j].set_xticklabels(ticks)
-                    axes[i, j].xaxis.set_ticks_position('bottom')
-                    axes[i, j].xaxis.set_label_position('bottom')
-                if j == 0 and i % 2 == 0:
-                    axes[i, j].set_ylabel(a, visible=True)
-                    #axes[i, j].yaxis.set_visible(True)
-                    axes[i, j].set_ylabel(a)
-                    axes[i, j].set_yticklabels(ticks)
-                    axes[i, j].yaxis.set_ticks_position('left')
-                    axes[i, j].yaxis.set_label_position('left')
-                if j == n - 1 and i % 2 == 1:
-                    axes[i, j].set_ylabel(a, visible=True)
-                    #axes[i, j].yaxis.set_visible(True)
-                    axes[i, j].set_ylabel(a)
-                    axes[i, j].set_yticklabels(ticks)
-                    axes[i, j].yaxis.set_ticks_position('right')
-                    axes[i, j].yaxis.set_label_position('right')
+                
+            axes[i, j].set_xlabel('')
+            axes[i, j].set_ylabel('')
+            axes[i, j].set_xticklabels([])
+            axes[i, j].set_yticklabels([])
+            ticks = df.index
+
+            is_datetype = ticks.inferred_type in ('datetime', 'date',
+                                              'datetime64')
+
+            if ticks.is_numeric() or is_datetype:
+                """
+                Matplotlib supports numeric values or datetime objects as
+                xaxis values. Taking LBYL approach here, by the time
+                matplotlib raises exception when using non numeric/datetime
+                values for xaxis, several actions are already taken by plt.
+                """
+                ticks = ticks._mpl_repr()
+
+            # setup labels
+            if i == 0 and j % 2 == 1:
+                axes[i, j].set_xlabel(b, visible=True)
+                #axes[i, j].xaxis.set_visible(True)
+                axes[i, j].set_xlabel(b)
+                axes[i, j].set_xticklabels(ticks)
+                axes[i, j].xaxis.set_ticks_position('top')
+                axes[i, j].xaxis.set_label_position('top')
+            if i == n - 1 and j % 2 == 0:
+                axes[i, j].set_xlabel(b, visible=True)
+                #axes[i, j].xaxis.set_visible(True)
+                axes[i, j].set_xlabel(b)
+                axes[i, j].set_xticklabels(ticks)
+                axes[i, j].xaxis.set_ticks_position('bottom')
+                axes[i, j].xaxis.set_label_position('bottom')
+            if j == 0 and i % 2 == 0:
+                axes[i, j].set_ylabel(a, visible=True)
+                #axes[i, j].yaxis.set_visible(True)
+                axes[i, j].set_ylabel(a)
+                axes[i, j].set_yticklabels(ticks)
+                axes[i, j].yaxis.set_ticks_position('left')
+                axes[i, j].yaxis.set_label_position('left')
+            if j == n - 1 and i % 2 == 1:
+                axes[i, j].set_ylabel(a, visible=True)
+                #axes[i, j].yaxis.set_visible(True)
+                axes[i, j].set_ylabel(a)
+                axes[i, j].set_yticklabels(ticks)
+                axes[i, j].yaxis.set_ticks_position('right')
+                axes[i, j].yaxis.set_label_position('right')
 
             axes[i, j].grid(b=grid)
 

From d7d6a0fe1242bd03f9565b25d2542a22057916cd Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Tue, 15 May 2012 00:12:52 +0300
Subject: [PATCH 104/114] Added simple test cases

---
 pandas/tests/test_graphics.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 8e987f35d42e7..6fe1f93448671 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -214,6 +214,8 @@ def scat(**kwds):
         _check_plot_works(scat)
         _check_plot_works(scat, marker='+')
         _check_plot_works(scat, vmin=0)
+        _check_plot_works(scat, diagonal='kde')
+        _check_plot_works(scat, diagonal='hist')
 
         def scat2(x, y, by=None, ax=None, figsize=None):
             return plt.scatter_plot(df, x, y, by, ax, figsize=None)

From cd8222c8b358aad2ac73ca38ba32bfe2e40f0d6d Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Wed, 16 May 2012 18:44:14 +0300
Subject: [PATCH 105/114] Updated plotting.py scatter_matrix docstring to
 describe all the parameters

---
 pandas/tools/plotting.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index c172c031e23df..11fc59add1eb9 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -19,6 +19,13 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
 
     Parameters
     ----------
+    alpha : amount of transparency applied
+    figsize : a tuple (width, height) in inches
+    ax : Matplotlib axis object
+    grid : setting this to True will show the grid
+    diagonal : pick between 'kde' and 'hist' for
+        either Kernel Density Estimation or Histogram
+        plon in the diagonal
     kwds : other plotting keyword arguments
         To be passed to scatter function
 
@@ -48,7 +55,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
                     axes[i, j].plot(ind, gkde.evaluate(ind), **kwds)
             else:
                 axes[i, j].scatter(df[b], df[a], alpha=alpha, **kwds)
-                
+
             axes[i, j].set_xlabel('')
             axes[i, j].set_ylabel('')
             axes[i, j].set_xticklabels([])

From 8e2f3f91b03abd1a9734d34931c5725872455cbd Mon Sep 17 00:00:00 2001
From: Vytautas Jancauskas <unaudio@gmail.com>
Date: Wed, 16 May 2012 19:12:16 +0300
Subject: [PATCH 106/114] Added scatter_matrix examples to visualization.rst

---
 doc/source/visualization.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index be969f3796935..6c035b816a9e9 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -241,5 +241,8 @@ Scatter plot matrix
    from pandas.tools.plotting import scatter_matrix
    df = DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd'])
 
-   @savefig scatter_matrix_ex.png width=6in
-   scatter_matrix(df, alpha=0.2, figsize=(8, 8))
+   @savefig scatter_matrix_kde.png width=6in
+   scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='kde')
+
+   @savefig scatter_matrix_hist.png width=6in
+   scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='hist')
\ No newline at end of file

From da1b234233497f1b76eff16514f1449c4c0a04ad Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 19 May 2012 13:11:35 -0400
Subject: [PATCH 107/114] DOC: release notes

---
 RELEASE.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index 5b1327302cd7f..cc86e644a0f38 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -69,6 +69,7 @@ pandas 0.8.0
   - Improved performance of join operations on integer keys (#682)
   - Can pass multiple columns to GroupBy object, e.g. grouped[[col1, col2]] to
     only aggregate a subset of the value columns (#383)
+  - Add histogram / kde plot options for scatter_matrix diagonals (#1237)
 
 **API Changes**
 

From a6e32b80023f45cef4e60426dc624a882e235b2d Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Fri, 11 May 2012 20:08:04 -0400
Subject: [PATCH 108/114] BUG: DataFrame.drop_duplicates with NA values

---
 pandas/src/groupby.pyx     | 13 ++++++++++
 pandas/tests/test_frame.py | 53 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/pandas/src/groupby.pyx b/pandas/src/groupby.pyx
index a05e619636dd4..5b6afb86e172b 100644
--- a/pandas/src/groupby.pyx
+++ b/pandas/src/groupby.pyx
@@ -1306,6 +1306,7 @@ def duplicated(list values, take_last=False):
     cdef:
         Py_ssize_t i, n
         dict seen = {}
+        int has_nan = 0
         object row
 
     n = len(values)
@@ -1316,6 +1317,12 @@ def duplicated(list values, take_last=False):
             row = values[i]
             if row in seen:
                 result[i] = 1
+            elif row != row:
+                if has_nan == 1:
+                    result[i] = 1
+                else:
+                    has_nan = 1
+                    result[i] = 0
             else:
                 seen[row] = None
                 result[i] = 0
@@ -1324,6 +1331,12 @@ def duplicated(list values, take_last=False):
             row = values[i]
             if row in seen:
                 result[i] = 1
+            elif row != row:
+                if has_nan == 1:
+                    result[i] = 1
+                else:
+                    has_nan = 1
+                    result[i] = 0
             else:
                 seen[row] = None
                 result[i] = 0
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 5310a4b0d7532..6aa47dd9114bb 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3423,6 +3423,59 @@ def test_drop_duplicates(self):
         expected = df2.drop_duplicates(['A', 'B'], take_last=True)
         assert_frame_equal(result, expected)
 
+    def test_drop_duplicates_NA(self):
+        # none
+        df = DataFrame({'A' : [None, None, 'foo', 'bar',
+                               'foo', 'bar', 'bar', 'foo'],
+                        'B' : ['one', 'one', 'two', 'two',
+                               'two', 'two', 'one', 'two'],
+                        'C' : [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.],
+                        'D' : range(8)})
+
+        # single column
+        result = df.drop_duplicates('A')
+        expected = df.ix[[0, 2, 3]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('A', take_last=True)
+        expected = df.ix[[1, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        # multi column
+        result = df.drop_duplicates(['A', 'B'])
+        expected = df.ix[[0, 2, 3, 6]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['A', 'B'], take_last=True)
+        expected = df.ix[[1, 5, 6, 7]]
+        assert_frame_equal(result, expected)
+
+        # nan
+        df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
+                               'foo', 'bar', 'bar', 'foo'],
+                        'B' : ['one', 'one', 'two', 'two',
+                               'two', 'two', 'one', 'two'],
+                        'C' : [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.],
+                        'D' : range(8)})
+
+        # single column
+        result = df.drop_duplicates('C')
+        expected = df[:2]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates('C', take_last=True)
+        expected = df.ix[[3, 7]]
+        assert_frame_equal(result, expected)
+
+        # multi column
+        result = df.drop_duplicates(['C', 'B'])
+        expected = df.ix[[0, 1, 2, 4]]
+        assert_frame_equal(result, expected)
+
+        result = df.drop_duplicates(['C', 'B'], take_last=True)
+        expected = df.ix[[1, 3, 6, 7]]
+        assert_frame_equal(result, expected)
+
     def test_drop_col_still_multiindex(self):
         arrays = [[  'a',   'b',   'c',    'top'],
                   [  '',    '',    '',     'OD' ],

From 2a6fc1110de088f3c535a00492a88d120f962601 Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 16:33:40 -0400
Subject: [PATCH 109/114] use fast zip with a placeholder value just for np.nan

---
 pandas/core/frame.py       | 26 +++++++++++++------
 pandas/src/groupby.pyx     | 34 ++++++++++++++++++++++---
 pandas/src/tseries.pyx     | 51 ++++++++++++++++++++++++++++++++++++++
 pandas/tests/test_frame.py |  9 ++++---
 4 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 333f91f94a67d..409d1a27af3b1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2431,7 +2431,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
         new_labels = labels[mask]
         return self.reindex(**{axis_name: new_labels})
 
-    def drop_duplicates(self, cols=None, take_last=False):
+    def drop_duplicates(self, cols=None, take_last=False, skipna=True):
         """
         Return DataFrame with duplicate rows removed, optionally only
         considering certain columns
@@ -2443,15 +2443,17 @@ def drop_duplicates(self, cols=None, take_last=False):
             default use all of the columns
         take_last : boolean, default False
             Take the last observed row in a row. Defaults to the first row
+        skipna : boolean, default True
+            If True then keep NaN
 
         Returns
         -------
         deduplicated : DataFrame
         """
-        duplicated = self.duplicated(cols, take_last=take_last)
+        duplicated = self.duplicated(cols, take_last=take_last, skipna=skipna)
         return self[-duplicated]
 
-    def duplicated(self, cols=None, take_last=False):
+    def duplicated(self, cols=None, take_last=False, skipna=True):
         """
         Return boolean Series denoting duplicate rows, optionally only
         considering certain columns
@@ -2463,20 +2465,29 @@ def duplicated(self, cols=None, take_last=False):
             default use all of the columns
         take_last : boolean, default False
             Take the last observed row in a row. Defaults to the first row
+        skipna : boolean, default True
+            If True then NaN are not marked as duplicates
 
         Returns
         -------
         duplicated : Series
         """
+        zip_func = lib.fast_zip if skipna else lib.fast_zip_fillna
+
         if cols is not None:
             if isinstance(cols, list):
-                keys = zip(*[self[x] for x in cols])
+                values = [self[x].values for x in cols]
+                keys = zip_func(values)
+                dup_func = lib.duplicated_skipna
             else:
-                keys = list(self[cols])
+                keys = self[cols]
+                dup_func = lib.duplicated_skipna if skipna else lib.duplicated
         else:
-            keys = zip(*self.values.T)
+            values = list(self.values.T)
+            keys = zip_func(values)
+            dup_func = lib.duplicated_skipna
 
-        duplicated = lib.duplicated(keys, take_last=take_last)
+        duplicated = dup_func(list(keys), take_last=take_last)
         return Series(duplicated, index=self.index)
 
     #----------------------------------------------------------------------
@@ -4614,7 +4625,6 @@ def _homogenize(data, index, columns, dtype=None):
 def _put_str(s, space):
     return ('%s' % s)[:space].ljust(space)
 
-
 def _is_sequence(x):
     try:
         iter(x)
diff --git a/pandas/src/groupby.pyx b/pandas/src/groupby.pyx
index 5b6afb86e172b..359412813f681 100644
--- a/pandas/src/groupby.pyx
+++ b/pandas/src/groupby.pyx
@@ -1301,12 +1301,39 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
 
     return counts
 
+def duplicated_skipna(list values, take_last=False):
+    cdef:
+        Py_ssize_t i, n
+        dict seen = {}
+        object row
+
+    n = len(values)
+    cdef ndarray[uint8_t] result = np.zeros(n, dtype=np.uint8)
+
+    if take_last:
+        for i from n > i >= 0:
+            row = values[i]
+            if row in seen:
+                result[i] = 1
+            else:
+                seen[row] = None
+                result[i] = 0
+    else:
+        for i from 0 <= i < n:
+            row = values[i]
+            if row in seen:
+                result[i] = 1
+            else:
+                seen[row] = None
+                result[i] = 0
+
+    return result.view(np.bool_)
 
 def duplicated(list values, take_last=False):
     cdef:
         Py_ssize_t i, n
         dict seen = {}
-        int has_nan = 0
+        bint has_nan = 0
         object row
 
     n = len(values)
@@ -1318,7 +1345,7 @@ def duplicated(list values, take_last=False):
             if row in seen:
                 result[i] = 1
             elif row != row:
-                if has_nan == 1:
+                if has_nan:
                     result[i] = 1
                 else:
                     has_nan = 1
@@ -1332,7 +1359,7 @@ def duplicated(list values, take_last=False):
             if row in seen:
                 result[i] = 1
             elif row != row:
-                if has_nan == 1:
+                if has_nan:
                     result[i] = 1
                 else:
                     has_nan = 1
@@ -1343,7 +1370,6 @@ def duplicated(list values, take_last=False):
 
     return result.view(np.bool_)
 
-
 def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups):
     cdef:
         Py_ssize_t i, group_size, n, lab, start
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index 18bdd8f6644da..f90edf7aa5966 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -404,6 +404,57 @@ def fast_zip(list ndarrays):
 
     return result
 
+cdef class _PandasNull:
+    pass
+
+pandas_null = _PandasNull()
+
+def fast_zip_fillna(list ndarrays, fill_value=pandas_null):
+    '''
+    For zipping multiple ndarrays into an ndarray of tuples
+    '''
+    cdef:
+        Py_ssize_t i, j, k, n
+        ndarray[object] result
+        flatiter it
+        object val, tup
+
+    k = len(ndarrays)
+    n = len(ndarrays[0])
+
+    result = np.empty(n, dtype=object)
+
+    # initialize tuples on first pass
+    arr = ndarrays[0]
+    it = <flatiter> PyArray_IterNew(arr)
+    for i in range(n):
+        val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
+        tup = PyTuple_New(k)
+
+        if val != val:
+            val = fill_value
+
+        PyTuple_SET_ITEM(tup, 0, val)
+        Py_INCREF(val)
+        result[i] = tup
+        PyArray_ITER_NEXT(it)
+
+    for j in range(1, k):
+        arr = ndarrays[j]
+        it = <flatiter> PyArray_IterNew(arr)
+        if len(arr) != n:
+            raise ValueError('all arrays must be same length')
+
+        for i in range(n):
+            val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
+            if val != val:
+                val = fill_value
+
+            PyTuple_SET_ITEM(result[i], j, val)
+            Py_INCREF(val)
+            PyArray_ITER_NEXT(it)
+
+    return result
 
 def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length):
     cdef:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 6aa47dd9114bb..866880a1a3f32 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1,3 +1,4 @@
+
 # pylint: disable-msg=W0612,E1101
 from copy import deepcopy
 from datetime import datetime, timedelta
@@ -3459,20 +3460,20 @@ def test_drop_duplicates_NA(self):
                         'D' : range(8)})
 
         # single column
-        result = df.drop_duplicates('C')
+        result = df.drop_duplicates('C', skipna=False)
         expected = df[:2]
         assert_frame_equal(result, expected)
 
-        result = df.drop_duplicates('C', take_last=True)
+        result = df.drop_duplicates('C', take_last=True, skipna=False)
         expected = df.ix[[3, 7]]
         assert_frame_equal(result, expected)
 
         # multi column
-        result = df.drop_duplicates(['C', 'B'])
+        result = df.drop_duplicates(['C', 'B'], skipna=False)
         expected = df.ix[[0, 1, 2, 4]]
         assert_frame_equal(result, expected)
 
-        result = df.drop_duplicates(['C', 'B'], take_last=True)
+        result = df.drop_duplicates(['C', 'B'], take_last=True, skipna=False)
         expected = df.ix[[1, 3, 6, 7]]
         assert_frame_equal(result, expected)
 

From d95a25469ab8f3bd088e3a3b0352b569274a58bd Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 16:41:28 -0400
Subject: [PATCH 110/114] TST: vbench for drop_duplicate with skipna set to
 False

---
 vb_suite/reindex.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py
index 9c307131ae5ac..e20784b1cf8df 100644
--- a/vb_suite/reindex.py
+++ b/vb_suite/reindex.py
@@ -135,6 +135,11 @@ def backfill():
                                   name='frame_drop_duplicates',
                                   start_date=datetime(2011, 11, 15))
 
+statement2 = "df.drop_duplicates(['key1', 'key2'], skipna=False)"
+frame_drop_duplicates_na = Benchmark(statement, setup,
+                                     name='frame_drop_duplicates',
+                                     start_date=datetime(2012, 5, 15))
+
 #----------------------------------------------------------------------
 # fillna, many columns
 

From 7953ae85dac449e66f2742546a9df0dd53f5593c Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Tue, 15 May 2012 18:31:45 -0400
Subject: [PATCH 111/114] optimized a little bit for speed

---
 pandas/core/frame.py         | 21 +++-----
 pandas/src/groupby.pyx       | 96 ++++++++++++++++++++++--------------
 pandas/src/tseries.pyx       | 52 -------------------
 pandas/tests/test_frame.py   |  8 +--
 pandas/tests/test_tseries.py |  6 ++-
 vb_suite/reindex.py          | 18 +++++--
 6 files changed, 89 insertions(+), 112 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 409d1a27af3b1..44812853c278f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2431,7 +2431,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
         new_labels = labels[mask]
         return self.reindex(**{axis_name: new_labels})
 
-    def drop_duplicates(self, cols=None, take_last=False, skipna=True):
+    def drop_duplicates(self, cols=None, take_last=False):
         """
         Return DataFrame with duplicate rows removed, optionally only
         considering certain columns
@@ -2450,10 +2450,10 @@ def drop_duplicates(self, cols=None, take_last=False, skipna=True):
         -------
         deduplicated : DataFrame
         """
-        duplicated = self.duplicated(cols, take_last=take_last, skipna=skipna)
+        duplicated = self.duplicated(cols, take_last=take_last)
         return self[-duplicated]
 
-    def duplicated(self, cols=None, take_last=False, skipna=True):
+    def duplicated(self, cols=None, take_last=False):
         """
         Return boolean Series denoting duplicate rows, optionally only
         considering certain columns
@@ -2465,29 +2465,22 @@ def duplicated(self, cols=None, take_last=False, skipna=True):
             default use all of the columns
         take_last : boolean, default False
             Take the last observed row in a row. Defaults to the first row
-        skipna : boolean, default True
-            If True then NaN are not marked as duplicates
 
         Returns
         -------
         duplicated : Series
         """
-        zip_func = lib.fast_zip if skipna else lib.fast_zip_fillna
-
         if cols is not None:
             if isinstance(cols, list):
                 values = [self[x].values for x in cols]
-                keys = zip_func(values)
-                dup_func = lib.duplicated_skipna
+                keys = lib.fast_zip_fillna(values)
             else:
-                keys = self[cols]
-                dup_func = lib.duplicated_skipna if skipna else lib.duplicated
+                keys = lib.fast_zip_fillna([self[cols]])
         else:
             values = list(self.values.T)
-            keys = zip_func(values)
-            dup_func = lib.duplicated_skipna
+            keys = lib.fast_zip_fillna(values)
 
-        duplicated = dup_func(list(keys), take_last=take_last)
+        duplicated = lib.duplicated(keys, take_last=take_last)
         return Series(duplicated, index=self.index)
 
     #----------------------------------------------------------------------
diff --git a/pandas/src/groupby.pyx b/pandas/src/groupby.pyx
index 359412813f681..78c3b0ff3f11a 100644
--- a/pandas/src/groupby.pyx
+++ b/pandas/src/groupby.pyx
@@ -1301,39 +1301,72 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
 
     return counts
 
-def duplicated_skipna(list values, take_last=False):
+cdef class _PandasNull:
+
+    def __richcmp__(_PandasNull self, object other, int op):
+        if op == 2: # ==
+            return isinstance(other, _PandasNull)
+        elif op == 3: # !=
+            return not isinstance(other, _PandasNull)
+        else:
+            return False
+
+    def __hash__(self):
+        return 0
+
+pandas_null = _PandasNull()
+
+def fast_zip_fillna(list ndarrays, fill_value=pandas_null):
+    '''
+    For zipping multiple ndarrays into an ndarray of tuples
+    '''
     cdef:
-        Py_ssize_t i, n
-        dict seen = {}
-        object row
+        Py_ssize_t i, j, k, n
+        ndarray[object] result
+        flatiter it
+        object val, tup
 
-    n = len(values)
-    cdef ndarray[uint8_t] result = np.zeros(n, dtype=np.uint8)
+    k = len(ndarrays)
+    n = len(ndarrays[0])
 
-    if take_last:
-        for i from n > i >= 0:
-            row = values[i]
-            if row in seen:
-                result[i] = 1
-            else:
-                seen[row] = None
-                result[i] = 0
-    else:
-        for i from 0 <= i < n:
-            row = values[i]
-            if row in seen:
-                result[i] = 1
-            else:
-                seen[row] = None
-                result[i] = 0
+    result = np.empty(n, dtype=object)
 
-    return result.view(np.bool_)
+    # initialize tuples on first pass
+    arr = ndarrays[0]
+    it = <flatiter> PyArray_IterNew(arr)
+    for i in range(n):
+        val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
+        tup = PyTuple_New(k)
+
+        if val != val:
+            val = fill_value
 
-def duplicated(list values, take_last=False):
+        PyTuple_SET_ITEM(tup, 0, val)
+        Py_INCREF(val)
+        result[i] = tup
+        PyArray_ITER_NEXT(it)
+
+    for j in range(1, k):
+        arr = ndarrays[j]
+        it = <flatiter> PyArray_IterNew(arr)
+        if len(arr) != n:
+            raise ValueError('all arrays must be same length')
+
+        for i in range(n):
+            val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
+            if val != val:
+                val = fill_value
+
+            PyTuple_SET_ITEM(result[i], j, val)
+            Py_INCREF(val)
+            PyArray_ITER_NEXT(it)
+
+    return result
+
+def duplicated(ndarray[object] values, take_last=False):
     cdef:
         Py_ssize_t i, n
         dict seen = {}
-        bint has_nan = 0
         object row
 
     n = len(values)
@@ -1342,14 +1375,9 @@ def duplicated(list values, take_last=False):
     if take_last:
         for i from n > i >= 0:
             row = values[i]
+
             if row in seen:
                 result[i] = 1
-            elif row != row:
-                if has_nan:
-                    result[i] = 1
-                else:
-                    has_nan = 1
-                    result[i] = 0
             else:
                 seen[row] = None
                 result[i] = 0
@@ -1358,12 +1386,6 @@ def duplicated(list values, take_last=False):
             row = values[i]
             if row in seen:
                 result[i] = 1
-            elif row != row:
-                if has_nan:
-                    result[i] = 1
-                else:
-                    has_nan = 1
-                    result[i] = 0
             else:
                 seen[row] = None
                 result[i] = 0
diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx
index f90edf7aa5966..8db04bc6396ad 100644
--- a/pandas/src/tseries.pyx
+++ b/pandas/src/tseries.pyx
@@ -404,58 +404,6 @@ def fast_zip(list ndarrays):
 
     return result
 
-cdef class _PandasNull:
-    pass
-
-pandas_null = _PandasNull()
-
-def fast_zip_fillna(list ndarrays, fill_value=pandas_null):
-    '''
-    For zipping multiple ndarrays into an ndarray of tuples
-    '''
-    cdef:
-        Py_ssize_t i, j, k, n
-        ndarray[object] result
-        flatiter it
-        object val, tup
-
-    k = len(ndarrays)
-    n = len(ndarrays[0])
-
-    result = np.empty(n, dtype=object)
-
-    # initialize tuples on first pass
-    arr = ndarrays[0]
-    it = <flatiter> PyArray_IterNew(arr)
-    for i in range(n):
-        val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
-        tup = PyTuple_New(k)
-
-        if val != val:
-            val = fill_value
-
-        PyTuple_SET_ITEM(tup, 0, val)
-        Py_INCREF(val)
-        result[i] = tup
-        PyArray_ITER_NEXT(it)
-
-    for j in range(1, k):
-        arr = ndarrays[j]
-        it = <flatiter> PyArray_IterNew(arr)
-        if len(arr) != n:
-            raise ValueError('all arrays must be same length')
-
-        for i in range(n):
-            val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
-            if val != val:
-                val = fill_value
-
-            PyTuple_SET_ITEM(result[i], j, val)
-            Py_INCREF(val)
-            PyArray_ITER_NEXT(it)
-
-    return result
-
 def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length):
     cdef:
         Py_ssize_t i, n = len(indexer)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 866880a1a3f32..f1b2538cc19b4 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3460,20 +3460,20 @@ def test_drop_duplicates_NA(self):
                         'D' : range(8)})
 
         # single column
-        result = df.drop_duplicates('C', skipna=False)
+        result = df.drop_duplicates('C')
         expected = df[:2]
         assert_frame_equal(result, expected)
 
-        result = df.drop_duplicates('C', take_last=True, skipna=False)
+        result = df.drop_duplicates('C', take_last=True)
         expected = df.ix[[3, 7]]
         assert_frame_equal(result, expected)
 
         # multi column
-        result = df.drop_duplicates(['C', 'B'], skipna=False)
+        result = df.drop_duplicates(['C', 'B'])
         expected = df.ix[[0, 1, 2, 4]]
         assert_frame_equal(result, expected)
 
-        result = df.drop_duplicates(['C', 'B'], take_last=True, skipna=False)
+        result = df.drop_duplicates(['C', 'B'], take_last=True)
         expected = df.ix[[1, 3, 6, 7]]
         assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py
index 57f154384bf91..a29f44127a173 100644
--- a/pandas/tests/test_tseries.py
+++ b/pandas/tests/test_tseries.py
@@ -170,7 +170,7 @@ def test_ensure_platform_int():
     assert(result is arr)
 
 def test_duplicated_with_nas():
-    keys = [0, 1, nan, 0, 2, nan]
+    keys = np.array([0, 1, nan, 0, 2, nan], dtype=object)
 
     result = lib.duplicated(keys)
     expected = [False, False, False, True, False, True]
@@ -180,7 +180,9 @@ def test_duplicated_with_nas():
     expected = [True, False, True, False, False, False]
     assert(np.array_equal(result, expected))
 
-    keys = [(0, 0), (0, nan), (nan, 0), (nan, nan)] * 2
+    keys = np.empty(8, dtype=object)
+    for i, t in enumerate(zip([0, 0, nan, nan]*2, [0, nan, 0, nan]*2)):
+        keys[i] = t
 
     result = lib.duplicated(keys)
     falses = [False] * 4
diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py
index e20784b1cf8df..24109e0559b4a 100644
--- a/vb_suite/reindex.py
+++ b/vb_suite/reindex.py
@@ -114,6 +114,7 @@ def backfill():
 
 # pathological, but realistic
 setup = common_setup + """
+import pandas._tseries as lib
 N = 10000
 K = 10
 
@@ -135,11 +136,22 @@ def backfill():
                                   name='frame_drop_duplicates',
                                   start_date=datetime(2011, 11, 15))
 
-statement2 = "df.drop_duplicates(['key1', 'key2'], skipna=False)"
-frame_drop_duplicates_na = Benchmark(statement, setup,
-                                     name='frame_drop_duplicates',
+lib_fast_zip = Benchmark('lib.fast_zip(df.values.T)', setup,
+                         name='lib_fast_zip',
+                         start_date=datetime(2012, 1, 1))
+
+setup = setup + """
+df.ix[:10000, :] = np.nan
+"""
+statement2 = "df.drop_duplicates(['key1', 'key2'])"
+frame_drop_duplicates_na = Benchmark(statement2, setup,
+                                     name='frame_drop_duplicates_na',
                                      start_date=datetime(2012, 5, 15))
 
+lib_fast_zip_fillna = Benchmark('lib.fast_zip_fillna(df.values.T)', setup,
+                                name='lib_fast_zip_fillna',
+                                start_date=datetime(2012, 5, 15))
+
 #----------------------------------------------------------------------
 # fillna, many columns
 

From 916be1d382094f17b0dfa8f350b4eb9b5294960e Mon Sep 17 00:00:00 2001
From: Chang She <chang@lambdafoundry.com>
Date: Wed, 16 May 2012 16:52:05 -0400
Subject: [PATCH 112/114] ENH: inplace option to DataFrame.drop_duplicates #805
 with vbench

---
 pandas/core/frame.py       | 14 +++++++++--
 pandas/tests/test_frame.py | 50 ++++++++++++++++++++++++++++++++++++++
 vb_suite/reindex.py        | 10 ++++++++
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 44812853c278f..dc48baec85a00 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2431,7 +2431,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None):
         new_labels = labels[mask]
         return self.reindex(**{axis_name: new_labels})
 
-    def drop_duplicates(self, cols=None, take_last=False):
+    def drop_duplicates(self, cols=None, take_last=False, inplace=False):
         """
         Return DataFrame with duplicate rows removed, optionally only
         considering certain columns
@@ -2445,13 +2445,23 @@ def drop_duplicates(self, cols=None, take_last=False):
             Take the last observed row in a row. Defaults to the first row
         skipna : boolean, default True
             If True then keep NaN
+        inplace : boolean, default False
+            Whether to drop duplicates in place or to return a copy
 
         Returns
         -------
         deduplicated : DataFrame
         """
+
         duplicated = self.duplicated(cols, take_last=take_last)
-        return self[-duplicated]
+
+        if inplace:
+            inds, = (-duplicated).nonzero()
+            self._data = self._data.take(inds)
+            self._clear_item_cache()
+            return self
+        else:
+            return self[-duplicated]
 
     def duplicated(self, cols=None, take_last=False):
         """
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index f1b2538cc19b4..209dbdc73f7fd 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -3477,6 +3477,56 @@ def test_drop_duplicates_NA(self):
         expected = df.ix[[1, 3, 6, 7]]
         assert_frame_equal(result, expected)
 
+    def test_drop_duplicates_inplace(self):
+        orig = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
+                                 'foo', 'bar', 'bar', 'foo'],
+                          'B' : ['one', 'one', 'two', 'two',
+                                 'two', 'two', 'one', 'two'],
+                          'C' : [1, 1, 2, 2, 2, 2, 1, 2],
+                          'D' : range(8)})
+
+        # single column
+        df = orig.copy()
+        df.drop_duplicates('A', inplace=True)
+        expected = orig[:2]
+        result = df
+        assert_frame_equal(result, expected)
+
+        df = orig.copy()
+        df.drop_duplicates('A', take_last=True, inplace=True)
+        expected = orig.ix[[6, 7]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        # multi column
+        df = orig.copy()
+        df.drop_duplicates(['A', 'B'], inplace=True)
+        expected = orig.ix[[0, 1, 2, 3]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        df = orig.copy()
+        df.drop_duplicates(['A', 'B'], take_last=True, inplace=True)
+        expected = orig.ix[[0, 5, 6, 7]]
+        result = df
+        assert_frame_equal(result, expected)
+
+        # consider everything
+        orig2 = orig.ix[:, ['A', 'B', 'C']].copy()
+
+        df2 = orig2.copy()
+        df2.drop_duplicates(inplace=True)
+        # in this case only
+        expected = orig2.drop_duplicates(['A', 'B'])
+        result = df2
+        assert_frame_equal(result, expected)
+
+        df2 = orig2.copy()
+        df2.drop_duplicates(take_last=True, inplace=True)
+        expected = orig2.drop_duplicates(['A', 'B'], take_last=True)
+        result = df2
+        assert_frame_equal(result, expected)
+
     def test_drop_col_still_multiindex(self):
         arrays = [[  'a',   'b',   'c',    'top'],
                   [  '',    '',    '',     'OD' ],
diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py
index 24109e0559b4a..62b26724eff46 100644
--- a/vb_suite/reindex.py
+++ b/vb_suite/reindex.py
@@ -136,6 +136,11 @@ def backfill():
                                   name='frame_drop_duplicates',
                                   start_date=datetime(2011, 11, 15))
 
+statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
+frame_drop_dup_inplace = Benchmark(statement, setup,
+                                  name='frame_drop_dup_inplace',
+                                  start_date=datetime(2012, 5, 16))
+
 lib_fast_zip = Benchmark('lib.fast_zip(df.values.T)', setup,
                          name='lib_fast_zip',
                          start_date=datetime(2012, 1, 1))
@@ -152,6 +157,11 @@ def backfill():
                                 name='lib_fast_zip_fillna',
                                 start_date=datetime(2012, 5, 15))
 
+statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
+frame_drop_dup_na_inplace = Benchmark(statement2, setup,
+                                  name='frame_drop_dup_na_inplace',
+                                  start_date=datetime(2012, 5, 16))
+
 #----------------------------------------------------------------------
 # fillna, many columns
 

From ba6a9c81f43740b820216c85aab8814953482266 Mon Sep 17 00:00:00 2001
From: Takafumi Arakaki <aka.tkf@gmail.com>
Date: Wed, 16 May 2012 23:25:10 +0200
Subject: [PATCH 113/114] BUG: replace complex64 with complex128

As mentioned in #1098.
---
 pandas/core/internals.py       |  4 ++--
 pandas/src/inference.pyx       | 10 +++++-----
 pandas/tests/test_frame.py     |  2 +-
 pandas/tests/test_internals.py |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7e8e67274a0a4..c4e4d810f4e0c 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1116,7 +1116,7 @@ def form_blocks(data, axes):
         blocks.append(float_block)
 
     if len(complex_dict):
-        complex_block = _simple_blockify(complex_dict, items, np.complex64)
+        complex_block = _simple_blockify(complex_dict, items, np.complex128)
         blocks.append(complex_block)
 
     if len(int_dict):
@@ -1222,7 +1222,7 @@ def _interleaved_dtype(blocks):
     elif have_dt64 and not have_float and not have_complex:
         return np.datetime64
     elif have_complex:
-        return np.complex64
+        return np.complex128
     else:
         return np.float64
 
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 63e6776abaa22..87fbb7076880e 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -11,7 +11,7 @@ _TYPE_MAP = {
     np.uint64: 'integer',
     np.float32: 'floating',
     np.float64: 'floating',
-    np.complex64: 'complex',
+    np.complex128: 'complex',
     np.complex128: 'complex',
     np.string_: 'string',
     np.unicode_: 'unicode',
@@ -223,7 +223,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
-        ndarray[complex64_t] complexes
+        ndarray[complex128_t] complexes
         ndarray[int64_t] ints
         bint seen_float = 0
         bint seen_complex = 0
@@ -233,7 +233,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
     n = len(values)
 
     floats = np.empty(n, dtype='f8')
-    complexes = np.empty(n, dtype='c8')
+    complexes = np.empty(n, dtype='c16')
     ints = np.empty(n, dtype='i8')
 
     for i from 0 <= i < n:
@@ -278,7 +278,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
-        ndarray[complex64_t] complexes
+        ndarray[complex128_t] complexes
         ndarray[int64_t] ints
         ndarray[uint8_t] bools
         bint seen_float = 0
@@ -293,7 +293,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     n = len(objects)
 
     floats = np.empty(n, dtype='f8')
-    complexes = np.empty(n, dtype='c8')
+    complexes = np.empty(n, dtype='c16')
     ints = np.empty(n, dtype='i8')
     bools = np.empty(n, dtype=np.uint8)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 209dbdc73f7fd..4cda34cbc89ee 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -1693,7 +1693,7 @@ def test_constructor_scalar_inference(self):
         self.assert_(df['int'].dtype == np.int64)
         self.assert_(df['bool'].dtype == np.bool_)
         self.assert_(df['float'].dtype == np.float64)
-        self.assert_(df['complex'].dtype == np.complex64)
+        self.assert_(df['complex'].dtype == np.complex128)
         self.assert_(df['object'].dtype == np.object_)
 
     def test_constructor_DataFrame(self):
diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
index 976b4439fffdf..cf1ce851a6bfb 100644
--- a/pandas/tests/test_internals.py
+++ b/pandas/tests/test_internals.py
@@ -27,7 +27,7 @@ def get_float_ex(cols=['a', 'c', 'e']):
     return make_block(floats, cols, TEST_COLS)
 
 def get_complex_ex(cols=['h']):
-    complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex64)
+    complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex128)
     return make_block(complexes, cols, TEST_COLS)
 
 def get_obj_ex(cols=['b', 'd']):
@@ -211,7 +211,7 @@ def test_block_id_vector_item_dtypes(self):
 
         result = self.mgr.item_dtypes
         expected = ['float64', 'object', 'float64', 'object', 'float64',
-                    'bool', 'int64', 'complex64']
+                    'bool', 'int64', 'complex128']
         self.assert_(np.array_equal(result, expected))
 
     def test_union_block_items(self):

From 1cacb6cac3bc673f3aba1e111f56bc3dbef49950 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Sat, 19 May 2012 13:21:15 -0400
Subject: [PATCH 114/114] ENH: add KDE plot from #1059

---
 RELEASE.rst              |  3 +++
 pandas/tools/plotting.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/RELEASE.rst b/RELEASE.rst
index cc86e644a0f38..515d9bab794ec 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -53,6 +53,7 @@ pandas 0.8.0
   - Add keys() method to DataFrame
   - Add flexible replace method for replacing potentially values to Series and
     DataFrame (#929, #1241)
+  - Add 'kde' plot kind for Series/DataFrame.plot (#1059)
 
 **Improvements to existing features**
 
@@ -70,6 +71,7 @@ pandas 0.8.0
   - Can pass multiple columns to GroupBy object, e.g. grouped[[col1, col2]] to
     only aggregate a subset of the value columns (#383)
   - Add histogram / kde plot options for scatter_matrix diagonals (#1237)
+  - Add inplace option to DataFrame.drop_duplicates (#805)
 
 **API Changes**
 
@@ -101,6 +103,7 @@ pandas 0.8.0
   - Handle Excel 2003 #N/A as NaN from xlrd (#1213, #1225)
   - Fix timestamp locale-related deserialization issues with HDFStore by moving
     to datetime64 representation (#1081, #809)
+  - Fix DataFrame.duplicated/drop_duplicates NA value handling (#557)
 
 pandas 0.7.3
 ============
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 11fc59add1eb9..9fd3e5d173bf9 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -338,6 +338,38 @@ def _get_xticks(self):
 
         return x
 
+class KdePlot(MPLPlot):
+    def __init__(self, data, **kwargs):
+        MPLPlot.__init__(self, data, **kwargs)
+
+    def _get_plot_function(self):
+        return self.plt.Axes.plot
+
+    def _make_plot(self):
+        plotf = self._get_plot_function()
+        for i, (label, y) in enumerate(self._iter_data()):
+            if self.subplots:
+                ax = self.axes[i]
+                style = 'k'
+            else:
+                style = ''  # empty string ignored
+                ax = self.ax
+            if self.style:
+                style = self.style
+            gkde = stats.gaussian_kde(y)
+            sample_range = max(y) - min(y)
+            ind = np.linspace(min(y) - 0.5 * sample_range,
+                max(y) + 0.5 * sample_range, 1000)
+            ax.set_ylabel("Density")
+            plotf(ax, ind, gkde.evaluate(ind), style, label=label, **self.kwds)
+            ax.grid(self.grid)
+
+    def _post_plot_logic(self):
+        df = self.data
+
+        if self.subplots and self.legend:
+            self.axes[0].legend(loc='best')
+
 class LinePlot(MPLPlot):
 
     def __init__(self, data, **kwargs):
@@ -682,6 +714,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None,
         klass = LinePlot
     elif kind in ('bar', 'barh'):
         klass = BarPlot
+    elif kind == 'kde':
+        klass = KdePlot
 
     if ax is None:
         ax = _gca()