ENH: dtype=... ufunc argument

ev-br · ev-br · commit 0bc1374c8ee9 · 2023-03-19T11:11:03.000+03:00
The implementation is a bit simpler than numpy: we do not have the notion
of ufunc loop types (np.add.types etc), so we just cast input tensors to the
`result_type(dtype, out.dtype)`, and ask pytorch to do computations in that
dtype.
diff --git a/torch_np/_detail/_util.py b/torch_np/_detail/_util.py
@@ -137,7 +137,7 @@ def axis_none_ravel(*tensors, axis=None):
         return tensors, axis
 
 
-def cast_dont_broadcast(tensors, target_dtype, casting):
+def typecast_tensors(tensors, target_dtype, casting):
     """Dtype-cast tensors to target_dtype.
 
     Parameters
@@ -170,52 +170,6 @@ def cast_dont_broadcast(tensors, target_dtype, casting):
     return tuple(cast_tensors)
 
 
-def cast_and_broadcast(tensors, out_param, casting):
-    """
-    Parameters
-    ----------
-    tensors : iterable
-        tuple or list of torch.Tensors to broadcast/typecast
-    target_dtype : a torch.dtype object
-        The torch dtype to cast all tensors to
-    target_shape : tuple
-        The tensor shape to broadcast all `tensors` to
-    casting : str
-        The casting mode, see `np.can_cast`
-
-    Returns
-    -------
-    a tuple of torch.Tensors with dtype being the PyTorch counterpart
-    of the `target_dtype` and `target_shape`
-    """
-    if out_param is None:
-        return tensors
-
-    target_dtype, target_shape = out_param
-
-    can_cast = _dtypes_impl.can_cast_impl
-
-    processed_tensors = []
-    for tensor in tensors:
-        # check dtypes of x and out
-        if not can_cast(tensor.dtype, target_dtype, casting=casting):
-            raise TypeError(
-                f"Cannot cast array data from {tensor.dtype} to"
-                f" {target_dtype} according to the rule '{casting}'"
-            )
-
-        # cast arr if needed
-        tensor = cast_if_needed(tensor, target_dtype)
-
-        # `out` broadcasts `tensor`
-        if tensor.shape != target_shape:
-            tensor = torch.broadcast_to(tensor, target_shape)
-
-        processed_tensors.append(tensor)
-
-    return tuple(processed_tensors)
-
-
 def axis_expand_func(func, tensor, axis, *args, **kwds):
     """Generically handle axis arguments in reductions."""
     if axis is not None:
diff --git a/torch_np/_detail/implementations.py b/torch_np/_detail/implementations.py
@@ -308,7 +308,7 @@ def _concat_cast_helper(tensors, out=None, dtype=None, casting="same_kind"):
         out_dtype = _dtypes_impl.result_type_impl([t.dtype for t in tensors])
 
     # cast input arrays if necessary; do not broadcast them agains `out`
-    tensors = _util.cast_dont_broadcast(tensors, out_dtype, casting)
+    tensors = _util.typecast_tensors(tensors, out_dtype, casting)
 
     return tensors
 
@@ -497,7 +497,7 @@ def bincount(x, /, weights=None, minlength=0):
         x = x.new_empty(0, dtype=int)
 
     int_dtype = _dtypes_impl.default_int_dtype
-    (x,) = _util.cast_dont_broadcast((x,), int_dtype, casting="safe")
+    (x,) = _util.typecast_tensors((x,), int_dtype, casting="safe")
 
     result = torch.bincount(x, weights, minlength)
     return result
diff --git a/torch_np/_helpers.py b/torch_np/_helpers.py
@@ -1,57 +1,38 @@
 import torch
 
-from . import _dtypes
-from ._detail import _util
-
-
-def cast_and_broadcast(tensors, out, casting):
-    """Cast dtypes of arrays to out.dtype and broadcast if needed.
-
-    Parameters
-    ----------
-    arrays : sequence of arrays
-        Each element is broadcast against `out` and typecast to out.dtype
-    out : the "output" array
-        Not modified.
-    casting : str
-        One of numpy casting modes
-
-    Returns
-    -------
-    tensors : tuple of Tensors
-        Each tensor is dtype-cast and broadcast agains `out`, as needed
-
-    Notes
-    -----
-    The `out` arrays broadcasts and dtype-casts `arrays`, but not vice versa.
-
-    """
-    if out is None:
-        return tensors
-    else:
-        tensors = _util.cast_and_broadcast(
-            tensors, out.dtype.type.torch_dtype, out.shape, casting
-        )
-
-    return tuple(tensors)
+from ._detail import _dtypes_impl, _util
 
 
 def ufunc_preprocess(
     tensors, out, where, casting, order, dtype, subok, signature, extobj
 ):
+    """
+    Notes
+    -----
+    The `out` array broadcasts `tensors`, but not vice versa.
+    """
     # internal preprocessing or args in ufuncs (cf _unary_ufuncs, _binary_ufuncs)
     if order != "K" or not where or signature or extobj:
         raise NotImplementedError
 
-    # XXX: dtype=... parameter
-    if dtype is not None:
-        raise NotImplementedError
+    # dtype of the result: depends on both dtype=... and out=... arguments
+    if dtype is None:
+        out_dtype = None if out is None else out.dtype.torch_dtype
+    else:
+        out_dtype = (
+            dtype
+            if out is None
+            else _dtypes_impl.result_type_impl([dtype, out.dtype.torch_dtype])
+        )
 
-    out_shape_dtype = None
-    if out is not None:
-        out_shape_dtype = (out.get().dtype, out.get().shape)
+    if out_dtype:
+        tensors = _util.typecast_tensors(tensors, out_dtype, casting)
 
-    tensors = _util.cast_and_broadcast(tensors, out_shape_dtype, casting)
+    # now broadcast input tensors against the out=... array
+    if out is not None:
+        # XXX: need to filter out noop broadcasts if t.shape == out.shape?
+        shape = out.shape
+        tensors = tuple(torch.broadcast_to(t, shape) for t in tensors)
 
     return tensors
 
diff --git a/torch_np/tests/test_ufuncs_basic.py b/torch_np/tests/test_ufuncs_basic.py
@@ -371,3 +371,56 @@ def test_other_array_bcast(self, ufunc, op, iop):
         if result_op.dtype != result_ufunc.dtype:
             pytest.xfail(reason="prob need weak type promotion (scalars)")
             assert result_op.dtype == result_ufunc.dtype
+
+
+class TestUfuncDtypeKwd:
+    def test_binary_ufunc_dtype(self):
+
+        # default computation uses float64:
+        r64 = np.add(1, 1e-15)
+        assert r64.dtype == "float64"
+        assert r64 - 1 > 0
+
+        # force the float32 dtype: loss of precision
+        r32 = np.add(1, 1e-15, dtype="float32")
+        assert r32.dtype == "float32"
+        assert r32 == 1
+
+        # casting of floating inputs to booleans
+        with assert_raises(TypeError):
+            np.add(1.0, 1e-15, dtype=bool)
+
+        # now force the cast
+        rb = np.add(1.0, 1e-15, dtype=bool, casting="unsafe")
+        assert rb.dtype == bool
+
+    def test_binary_ufunc_dtype_and_out(self):
+
+        # all in float64: no precision loss
+        out64 = np.empty(2, dtype=np.float64)
+        r64 = np.add([1.0, 2.0], 1.0e-15, out=out64)
+
+        assert (r64 != [1.0, 2.0]).all()
+        assert r64.dtype == np.float64
+
+        # all in float32: loss of precision, result is float32
+        out32 = np.empty(2, dtype=np.float32)
+        r32 = np.add([1.0, 2.0], 1.0e-15, dtype=np.float32, out=out32)
+        assert (r32 == [1, 2]).all()
+        assert r32.dtype == np.float32
+
+        # NB: this test differs from numpy: in numpy, r.dtype is float64
+        # but the precision is lost, r == [1, 2].
+        # I *guess* numpy casts inputs to the dtype=... value, performs calculations,
+        # and then casts the result back to out.dtype.
+        out64 = np.empty(2, dtype=np.float64)
+        r = np.add([1.0, 2.0], 1.0e-15, dtype=np.float32, out=out64)
+        assert (r != [1, 2]).all()
+        assert r.dtype == np.float64
+
+        # Internal computations are in float64, but the final cast to out.dtype
+        # truncates the precision => precision loss.
+        out32 = np.empty(2, dtype=np.float32)
+        r = np.add([1.0, 2.0], 1.0e-15, dtype=np.float64, out=out32)
+        assert (r == [1, 2]).all()
+        assert r.dtype == np.float32