Add atop(concatenate=False) keyword argument (dask#1609)

mrocklin · web-flow · commit 3862b4db18ce · 2016-10-04T11:37:24.000-04:00
This allows atop to pre-concatenate arrays before sending to the user
defined function.
diff --git a/dask/array/core.py b/dask/array/core.py
@@ -260,17 +260,17 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
 
     Applies a function, ``func``, across blocks from many different input
     dasks.  We arrange the pattern with which those blocks interact with sets
-    of matching indices.  E.g.
+    of matching indices.  E.g.::
 
-        ``top(func, 'z', 'i', 'x', 'i', 'y', 'i')``
+        top(func, 'z', 'i', 'x', 'i', 'y', 'i')
 
     yield an embarrassingly parallel communication pattern and is read as
 
         $$ z_i = func(x_i, y_i) $$
 
-    More complex patterns may emerge, including multiple indices
+    More complex patterns may emerge, including multiple indices::
 
-        ``top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')``
+        top(func, 'z', 'ij', 'x', 'ij', 'y', 'ji')
 
         $$ z_{ij} = func(x_{ij}, y_{ji}) $$
 
@@ -324,6 +324,15 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
      ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
                             [('y', 0, 1), ('y', 1, 1)])}
 
+    Pass ``concatenate=True`` to concatenate arrays ahead of time
+
+    >>> top(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True,
+    ...     numblocks={'x': (2, 2), 'y': (2, 2,)})  # doctest: +SKIP
+    {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)),
+                   (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,)))
+     ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)),
+                   (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))}
+
     Supports Broadcasting rules
 
     >>> top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2),
@@ -336,11 +345,16 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
     Support keyword arguments with apply
 
     >>> def f(a, b=0): return a + b
-    >>> top(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,), b=10})  # doctest: +SKIP
+    >>> top(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10)  # doctest: +SKIP
     {('z', 0): (apply, f, [('x', 0)], {'b': 10}),
      ('z', 1): (apply, f, [('x', 1)], {'b': 10})}
+
+    See Also
+    --------
+    atop
     """
     numblocks = kwargs.pop('numblocks')
+    concatenate = kwargs.pop('concatenate', None)
     argpairs = list(partition(2, arrind_pairs))
 
     assert set(numblocks) == set(pluck(0, argpairs))
@@ -366,6 +380,9 @@ def top(func, output, out_indices, *arrind_pairs, **kwargs):
         for arg, ind in argpairs:
             tups = lol_tuples((arg,), ind, kd, dummies)
             tups2 = zero_broadcast_dimensions(tups, numblocks[arg])
+            if concatenate and isinstance(tups2, list):
+                axes = [n for n, i in enumerate(ind) if i in dummies]
+                tups2 = (concatenate_axes, tups2, axes)
             args.append(tups2)
         valtups.append(tuple(args))
 
@@ -1731,6 +1748,8 @@ def atop(func, out_ind, *args, **kwargs):
         Function to apply to individual tuples of blocks
     out_ind: iterable
         Block pattern of the output, something like 'ijk' or (1, 2, 3)
+    concatenate: bool
+        If true concatenate arrays along dummy indices, else provide lists
     *args: sequence of Array, index pairs
         Sequence like (x, 'ij', y, 'jk', z, 'i')
     **kwargs: dict
@@ -1767,8 +1786,9 @@ def atop(func, out_ind, *args, **kwargs):
     Any index, like ``i`` missing from the output index is interpreted as a
     contraction (note that this differs from Einstein convention; repeated
     indices do not imply contraction.)  In the case of a contraction the passed
-    function should expect an iterator of blocks on any array that holds that
-    index.
+    function should expect an iterable of blocks on any array that holds that
+    index.  To receive arrays concatenated along contracted dimensions instead
+    pass ``concatenate=True``.
 
     Inner product multiplying x by y, two 1-d vectors
 
@@ -2101,10 +2121,10 @@ def tensordot(lhs, rhs, axes=2):
         out_index.remove(right_index[r])
         right_index[r] = left_index[l]
 
-    func = partial(np.tensordot, axes=(left_axes, right_axes))
-    intermediate = atop(func, out_index,
+    intermediate = atop(np.tensordot, out_index,
                         lhs, left_index,
-                        rhs, right_index, dtype=dt)
+                        rhs, right_index, dtype=dt,
+                        axes=(left_axes, right_axes))
 
     int_index = list(out_index)
     for l in left_axes:
@@ -3160,6 +3180,19 @@ def dtype(x):
     return result
 
 
+def concatenate_axes(arrays, axes):
+    """ Recurseively call np.concatenate along axes
+
+    TODO: This performs many copies.  We should be able to do this in one
+    TODO: Merge logic on concatenate3 with this
+    """
+    if len(axes) != ndimlist(arrays):
+        raise ValueError("Length of axes should equal depth of nested arrays")
+    if len(axes) > 1:
+        arrays = [concatenate_axes(a, axes[1:]) for a in arrays]
+    return np.concatenate(arrays, axis=axes[0])
+
+
 def to_hdf5(filename, *args, **kwargs):
     """ Store arrays in HDF5 file
 
diff --git a/dask/array/reductions.py b/dask/array/reductions.py
@@ -36,7 +36,7 @@ def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None,
 
     # Map chunk across all blocks
     inds = tuple(range(x.ndim))
-    tmp = atop(partial(chunk, axis=axis, keepdims=True), inds, x, inds)
+    tmp = atop(chunk, inds, x, inds, axis=axis, keepdims=True)
     tmp._chunks = tuple((1, ) * len(c) if i in axis else c for (i, c)
                         in enumerate(tmp.chunks))
 
diff --git a/dask/array/tests/test_array_core.py b/dask/array/tests/test_array_core.py
@@ -29,7 +29,7 @@
                              broadcast_to, reshape, fromfunction,
                              blockdims_from_blockshape, store, optimize,
                              from_func, normalize_chunks, broadcast_chunks,
-                             atop, from_delayed)
+                             atop, from_delayed, concatenate_axes)
 from dask.array.utils import assert_eq
 
 # temporary until numpy functions migrated
@@ -2119,3 +2119,60 @@ def test_from_array_raises_on_bad_chunks():
 
     with pytest.raises(ValueError):
         da.from_array(x, chunks=((5, 5, 5),))
+
+
+def test_concatenate_axes():
+    x = np.ones((2, 2, 2))
+
+    assert_eq(concatenate_axes([x, x], axes=[0]),
+              np.ones((4, 2, 2)))
+    assert_eq(concatenate_axes([x, x, x], axes=[0]),
+              np.ones((6, 2, 2)))
+    assert_eq(concatenate_axes([x, x], axes=[1]),
+              np.ones((2, 4, 2)))
+    assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]),
+              np.ones((4, 4, 2)))
+    assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]),
+              np.ones((4, 2, 4)))
+    assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]),
+              np.ones((2, 4, 6)))
+
+    with pytest.raises(ValueError):
+        concatenate_axes([[x, x], [x, x]], axes=[0])  # not all nested lists accounted for
+    with pytest.raises(ValueError):
+        concatenate_axes([x, x], axes=[0, 1, 2, 3])  # too many axes
+
+
+def test_atop_concatenate():
+    x = da.ones((4, 4, 4), chunks=(2, 2, 2))
+    y = da.ones((4, 4), chunks=(2, 2))
+
+    def f(a, b):
+        assert isinstance(a, np.ndarray)
+        assert isinstance(b, np.ndarray)
+
+        assert a.shape == (2, 4, 4)
+        assert b.shape == (4, 4)
+
+        return (a + b).sum(axis=(1, 2))
+
+    z = atop(f, 'i', x, 'ijk', y, 'jk', concatenate=True)
+    assert_eq(z, np.ones(4) * 32)
+
+    z = atop(add, 'ij', y, 'ij', y, 'ij', concatenate=True)
+    assert_eq(z, np.ones((4, 4)) * 2)
+
+
+    def f(a, b, c):
+        assert isinstance(a, np.ndarray)
+        assert isinstance(b, np.ndarray)
+        assert isinstance(c, np.ndarray)
+
+        assert a.shape == (4, 2, 4)
+        assert b.shape == (4, 4)
+        assert c.shape == (4, 2)
+
+        return np.ones(5)
+
+    z = atop(f, 'j', x, 'ijk', y, 'ki', y, 'ij', concatenate=True)
+    assert_eq(z, np.ones(10))