pandas-dev
diff --git a/‎appveyor.yml
+1-1 b/‎appveyor.yml
+1-1
diff --git a/‎asv_bench/benchmarks/io_bench.py
+30 b/‎asv_bench/benchmarks/io_bench.py
+30
diff --git a/‎asv_bench/benchmarks/sparse.py
+64-1 b/‎asv_bench/benchmarks/sparse.py
+64-1
diff --git a/‎ci/install.ps1
+2-2 b/‎ci/install.ps1
+2-2
diff --git a/‎ci/install_circle.sh
+4-2 b/‎ci/install_circle.sh
+4-2
diff --git a/‎ci/install_travis.sh
+7-3 b/‎ci/install_travis.sh
+7-3
diff --git a/‎ci/requirements-2.7_SLOW.run
+1-1 b/‎ci/requirements-2.7_SLOW.run
+1-1
diff --git a/‎ci/requirements-2.7_WIN.run
+1-1 b/‎ci/requirements-2.7_WIN.run
+1-1
diff --git a/‎ci/requirements_all.txt
+1-1 b/‎ci/requirements_all.txt
+1-1
diff --git a/‎doc/source/10min.rst
+2-12 b/‎doc/source/10min.rst
+2-12
diff --git a/‎doc/source/advanced.rst
+23 b/‎doc/source/advanced.rst
+23
diff --git a/‎doc/source/computation.rst
+1-1 b/‎doc/source/computation.rst
+1-1
diff --git a/‎doc/source/cookbook.rst
+1-1 b/‎doc/source/cookbook.rst
+1-1
diff --git a/‎doc/source/dsintro.rst
+1-1 b/‎doc/source/dsintro.rst
+1-1
diff --git a/‎doc/source/gotchas.rst
+1-1 b/‎doc/source/gotchas.rst
+1-1
diff --git a/‎doc/source/groupby.rst
+1-1 b/‎doc/source/groupby.rst
+1-1
diff --git a/‎doc/source/io.rst
+10 b/‎doc/source/io.rst
+10
diff --git a/‎doc/source/missing_data.rst
+1-1 b/‎doc/source/missing_data.rst
+1-1
@@ -59,7 +59,7 @@ install:
 
   # install our build environment
   - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
-  - cmd: conda update -q conda
+  # - cmd: conda update -q conda
   - cmd: conda config --set ssl_verify false
 
   # add the pandas channel *before* defaults to have defaults take priority
 
@@ -1,3 +1,4 @@
+import os
 from .pandas_vb_common import *
 from pandas import concat, Timestamp, compat
 try:
@@ -192,3 +193,32 @@ def time_read_nrows(self, compression, engine):
             ext = ".bz2"
         pd.read_csv(self.big_fname + ext, nrows=10,
                     compression=compression, engine=engine)
+
+
+class read_json_lines(object):
+    goal_time = 0.2
+    fname = "__test__.json"
+
+    def setup(self):
+        self.N = 100000
+        self.C = 5
+        self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
+        self.df.to_json(self.fname,orient="records",lines=True)
+
+    def teardown(self):
+        try:
+            os.remove(self.fname)
+        except:
+            pass
+
+    def time_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def time_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
+
+    def peakmem_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def peakmem_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
@@ -2,7 +2,7 @@
 
 from .pandas_vb_common import *
 import scipy.sparse
-from pandas import SparseSeries, SparseDataFrame
+from pandas import SparseSeries, SparseDataFrame, SparseArray
 
 
 class sparse_series_to_frame(object):
@@ -23,6 +23,69 @@ def time_sparse_series_to_frame(self):
         SparseDataFrame(self.series)
 
 
+class sparse_array_constructor(object):
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1)
+        self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
+        self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
+
+        self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
+        self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
+
+        self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
+        self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
+
+        self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
+        self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
+
+    def make_numeric_array(self, length, dense_size, fill_value, dtype):
+        arr = np.array([fill_value] * length, dtype=dtype)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.randint(0, 100, len(indexer))
+        return (arr, fill_value, dtype)
+
+    def make_object_array(self, length, dense_size, fill_value):
+        elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
+        arr = np.array([fill_value] * length, dtype=np.object)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.choice(elems, len(indexer))
+        return (arr, fill_value, np.object)
+
+    def time_sparse_array_constructor_int64_10percent(self):
+        arr, fill_value, dtype = self.int64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_int64_1percent(self):
+        arr, fill_value, dtype = self.int64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_10percent(self):
+        arr, fill_value, dtype = self.float64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_1percent(self):
+        arr, fill_value, dtype = self.float64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+
 class sparse_frame_constructor(object):
     goal_time = 0.2
 
 
@@ -7,7 +7,7 @@ $MINICONDA_URL = "http://repo.continuum.io/miniconda/"
 
 function DownloadMiniconda ($python_version, $platform_suffix) {
     $webclient = New-Object System.Net.WebClient
-    $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
+    $filename = "Miniconda3-4.3.21-Windows-" + $platform_suffix + ".exe"
     $url = $MINICONDA_URL + $filename
 
     $basedir = $pwd.Path + "\"
@@ -85,7 +85,7 @@ function UpdateConda ($python_home) {
 
 function main () {
     InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT
-    UpdateConda $env:CONDA_ROOT
+    # UpdateConda $env:CONDA_ROOT
     InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client"
 }
 
 
@@ -10,15 +10,17 @@ echo "[Using clean Miniconda install]"
 rm -rf "$MINICONDA_DIR"
 
 # install miniconda
-wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1
+# wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1
+# Pin miniconda
+wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -q -O miniconda.sh || exit 1
 bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
 
 export PATH="$MINICONDA_DIR/bin:$PATH"
 
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
 conda config --set always_yes true --set changeps1 false || exit 1
-conda update -q conda
+# conda update -q conda
 
 # add the pandas channel to take priority
 # to add extra packages
 
@@ -34,9 +34,13 @@ fi
 
 # install miniconda
 if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
-    time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
+    # temporarily pin miniconda
+    # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
+    time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-MacOSX-x86_64.sh -O miniconda.sh || exit 1
 else
-    time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
+    # temporarily pin miniconda
+    # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
+    time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -O miniconda.sh || exit 1
 fi
 time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
 
@@ -48,7 +52,7 @@ echo
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
 conda config --set quiet true --set always_yes true --set changeps1 false || exit 1
-conda update -q conda
+# conda update -q conda
 
 echo
 echo "[add channels]"
 
@@ -16,4 +16,4 @@ s3fs
 psycopg2
 pymysql
 html5lib
-beautiful-soup
+beautifulsoup4
@@ -14,5 +14,5 @@ xlsxwriter
 s3fs
 bottleneck
 html5lib
-beautiful-soup
+beautifulsoup4
 jinja2=2.8
@@ -13,7 +13,7 @@ xlrd
 xlwt
 html5lib
 patsy
-beautiful-soup
+beautifulsoup4
 numpy
 cython
 scipy
 
@@ -11,7 +11,7 @@
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    pd.options.display.max_rows = 15
 
    #### portions of this were borrowed from the
@@ -95,17 +95,7 @@ will be completed:
    df2.append             df2.combine_first
    df2.apply              df2.compound
    df2.applymap           df2.consolidate
-   df2.as_blocks          df2.convert_objects
-   df2.asfreq             df2.copy
-   df2.as_matrix          df2.corr
-   df2.astype             df2.corrwith
-   df2.at                 df2.count
-   df2.at_time            df2.cov
-   df2.axes               df2.cummax
-   df2.B                  df2.cummin
-   df2.between_time       df2.cumprod
-   df2.bfill              df2.cumsum
-   df2.blocks             df2.D
+   df2.D
 
 As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
 tab completed. ``E`` is there as well; the rest of the attributes have been
 
@@ -833,12 +833,21 @@ Of course if you need integer based selection, then use ``iloc``
 IntervalIndex
 ~~~~~~~~~~~~~
 
+:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the
+:class:`Interval` scalar type,  allow first-class support in pandas for interval
+notation.
+
+The ``IntervalIndex`` allows some unique indexing and is also used as a
+return type for the categories in :func:`cut` and :func:`qcut`.
+
 .. versionadded:: 0.20.0
 
 .. warning::
 
    These indexing behaviors are provisional and may change in a future version of pandas.
 
+An ``IntervalIndex`` can be used in ``Series`` and in ``DataFrame`` as the index.
+
 .. ipython:: python
 
    df = pd.DataFrame({'A': [1, 2, 3, 4]},
@@ -860,6 +869,20 @@ If you select a lable *contained* within an interval, this will also select the
    df.loc[2.5]
    df.loc[[2.5, 3.5]]
 
+``Interval`` and ``IntervalIndex`` are used by ``cut`` and ``qcut``:
+
+.. ipython:: python
+
+   c = pd.cut(range(4), bins=2)
+   c
+   c.categories
+
+Furthermore, ``IntervalIndex`` allows one to bin *other* data with these same
+bins, with ``NaN`` representing a missing value similar to other dtypes.
+
+.. ipython:: python
+
+   pd.cut([0, 3, 5, 1], bins=c.categories)
 
 Miscellaneous indexing FAQ
 --------------------------
 
@@ -8,7 +8,7 @@
    np.set_printoptions(precision=4, suppress=True)
    import pandas as pd
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
    pd.options.display.max_rows=15
 
@@ -20,7 +20,7 @@
    pd.options.display.max_rows=15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
 
    np.set_printoptions(precision=4, suppress=True)
 
 
@@ -10,7 +10,7 @@
    pd.options.display.max_rows = 15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
 
@@ -14,7 +14,7 @@ Frequently Asked Questions (FAQ)
    import pandas as pd
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
 
@@ -10,7 +10,7 @@
    import pandas as pd
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
    from collections import OrderedDict
 
@@ -1845,6 +1845,7 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
   seconds, milliseconds, microseconds or nanoseconds respectively.
 - ``lines`` : reads file as one json object per line.
 - ``encoding`` : The encoding to use to decode py3 bytes.
+- ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
 
 The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable.
 
@@ -2049,6 +2050,10 @@ Line delimited json
 pandas is able to read and write line-delimited json files that are common in data processing pipelines
 using Hadoop or Spark.
 
+.. versionadded:: 0.21.0
+
+For line-delimited json files, pandas can also return an iterator which reads in ``chunksize`` lines at a time. This can be useful for large files or to read from a stream.
+
 .. ipython:: python
 
   jsonl = '''
@@ -2059,6 +2064,11 @@ using Hadoop or Spark.
   df
   df.to_json(orient='records', lines=True)
 
+  # reader is an iterator that returns `chunksize` lines each iteration
+  reader = pd.read_json(StringIO(jsonl), lines=True, chunksize=1)
+  reader
+  for chunk in reader:
+      print(chunk)
 
 .. _io.table_schema:
 
 
@@ -7,7 +7,7 @@
    import pandas as pd
    pd.options.display.max_rows=15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
 
 .. _missing_data: