Remove RandomVariable dtype input

ricardoV94 · ricardoV94 · commit 48c8189739fc · 2024-05-24T12:27:11.000+02:00
diff --git a/pytensor/link/jax/dispatch/random.py b/pytensor/link/jax/dispatch/random.py
@@ -114,15 +114,15 @@ def jax_funcify_RandomVariable(op: ptr.RandomVariable, node, **kwargs):
     if None in static_size:
         assert_size_argument_jax_compatible(node)
 
-        def sample_fn(rng, size, dtype, *parameters):
+        def sample_fn(rng, size, *parameters):
             # PyTensor uses empty size to represent size = None
             if jax.numpy.asarray(size).shape == (0,):
                 size = None
             return jax_sample_fn(op, node=node)(rng, size, out_dtype, *parameters)
 
     else:
 
-        def sample_fn(rng, size, dtype, *parameters):
+        def sample_fn(rng, size, *parameters):
             return jax_sample_fn(op, node=node)(
                 rng, static_size, out_dtype, *parameters
             )
diff --git a/pytensor/link/numba/dispatch/random.py b/pytensor/link/numba/dispatch/random.py
@@ -123,7 +123,6 @@ def make_numba_random_fn(node, np_random_func):
             "size_dims",
             "rng",
             "size",
-            "dtype",
         ],
         suffix_sep="_",
     )
@@ -146,7 +145,7 @@ def {bcast_fn_name}({bcast_fn_input_names}):
     )
 
     random_fn_input_names = ", ".join(
-        ["rng", "size", "dtype"] + [unique_names(i) for i in dist_params]
+        ["rng", "size"] + [unique_names(i) for i in dist_params]
     )
 
     # Now, create a Numba JITable function that implements the `size` parameter
@@ -243,7 +242,7 @@ def create_numba_random_fn(
     np_global_env["numba_vectorize"] = numba_basic.numba_vectorize
 
     unique_names = unique_name_generator(
-        [np_random_fn_name, *np_global_env.keys(), "rng", "size", "dtype"],
+        [np_random_fn_name, *np_global_env.keys(), "rng", "size"],
         suffix_sep="_",
     )
 
@@ -310,7 +309,7 @@ def numba_funcify_CategoricalRV(op: ptr.CategoricalRV, node, **kwargs):
     p_ndim = node.inputs[-1].ndim
 
     @numba_basic.numba_njit
-    def categorical_rv(rng, size, dtype, p):
+    def categorical_rv(rng, size, p):
         if not size_len:
             size_tpl = p.shape[:-1]
         else:
@@ -342,7 +341,7 @@ def numba_funcify_DirichletRV(op, node, **kwargs):
     if alphas_ndim > 1:
 
         @numba_basic.numba_njit
-        def dirichlet_rv(rng, size, dtype, alphas):
+        def dirichlet_rv(rng, size, alphas):
             if size_len > 0:
                 size_tpl = numba_ndarray.to_fixed_tuple(size, size_len)
                 if (
@@ -365,7 +364,7 @@ def dirichlet_rv(rng, size, dtype, alphas):
     else:
 
         @numba_basic.numba_njit
-        def dirichlet_rv(rng, size, dtype, alphas):
+        def dirichlet_rv(rng, size, alphas):
             size = numba_ndarray.to_fixed_tuple(size, size_len)
             return (rng, np.random.dirichlet(alphas, size))
 
@@ -388,14 +387,14 @@ def numba_funcify_choice_without_replacement(op, node, **kwargs):
     if op.has_p_param:
 
         @numba_basic.numba_njit
-        def choice_without_replacement_rv(rng, size, dtype, a, p, core_shape):
+        def choice_without_replacement_rv(rng, size, a, p, core_shape):
             core_shape = numba_ndarray.to_fixed_tuple(core_shape, core_shape_len)
             samples = np.random.choice(a, size=core_shape, replace=False, p=p)
             return (rng, samples)
     else:
 
         @numba_basic.numba_njit
-        def choice_without_replacement_rv(rng, size, dtype, a, core_shape):
+        def choice_without_replacement_rv(rng, size, a, core_shape):
             core_shape = numba_ndarray.to_fixed_tuple(core_shape, core_shape_len)
             samples = np.random.choice(a, size=core_shape, replace=False)
             return (rng, samples)
@@ -411,7 +410,7 @@ def numba_funcify_permutation(op: ptr.PermutationRV, node, **kwargs):
     x_batch_ndim = node.inputs[-1].type.ndim - op.ndims_params[0]
 
     @numba_basic.numba_njit
-    def permutation_rv(rng, size, dtype, x):
+    def permutation_rv(rng, size, x):
         if batch_ndim:
             x_core_shape = x.shape[x_batch_ndim:]
             if size_is_none:
diff --git a/pytensor/tensor/random/op.py b/pytensor/tensor/random/op.py
@@ -27,7 +27,7 @@
     normalize_size_param,
 )
 from pytensor.tensor.shape import shape_tuple
-from pytensor.tensor.type import TensorType, all_dtypes
+from pytensor.tensor.type import TensorType
 from pytensor.tensor.type_other import NoneConst
 from pytensor.tensor.utils import _parse_gufunc_signature, safe_signature
 from pytensor.tensor.variable import TensorVariable
@@ -65,7 +65,7 @@ def __init__(
         signature: str
             Numpy-like vectorized signature of the random variable.
         dtype: str (optional)
-            The dtype of the sampled output.  If the value ``"floatX"`` is
+            The default dtype of the sampled output.  If the value ``"floatX"`` is
             given, then ``dtype`` is set to ``pytensor.config.floatX``.  If
             ``None`` (the default), the `dtype` keyword must be set when
             `RandomVariable.make_node` is called.
@@ -287,8 +287,8 @@ def extract_batch_shape(p, ps, n):
         return shape
 
     def infer_shape(self, fgraph, node, input_shapes):
-        _, size, _, *dist_params = node.inputs
-        _, size_shape, _, *param_shapes = input_shapes
+        _, size, *dist_params = node.inputs
+        _, size_shape, *param_shapes = input_shapes
 
         try:
             size_len = get_vector_length(size)
@@ -302,14 +302,34 @@ def infer_shape(self, fgraph, node, input_shapes):
         return [None, list(shape)]
 
     def __call__(self, *args, size=None, name=None, rng=None, dtype=None, **kwargs):
-        res = super().__call__(rng, size, dtype, *args, **kwargs)
+        if dtype is None:
+            dtype = self.dtype
+        if dtype == "floatX":
+            dtype = config.floatX
+
+        # We need to recreate the Op with the right dtype
+        if dtype != self.dtype:
+            # Check we are not switching from float to int
+            if self.dtype is not None:
+                if dtype.startswith("float") != self.dtype.startswith("float"):
+                    raise ValueError(
+                        f"Cannot change the dtype of a {self.name} RV from {self.dtype} to {dtype}"
+                    )
+            props = self._props_dict()
+            props["dtype"] = dtype
+            new_op = type(self)(**props)
+            return new_op.__call__(
+                *args, size=size, name=name, rng=rng, dtype=dtype, **kwargs
+            )
+
+        res = super().__call__(rng, size, *args, **kwargs)
 
         if name is not None:
             res.name = name
 
         return res
 
-    def make_node(self, rng, size, dtype, *dist_params):
+    def make_node(self, rng, size, *dist_params):
         """Create a random variable node.
 
         Parameters
@@ -349,23 +369,10 @@ def make_node(self, rng, size, dtype, *dist_params):
 
         shape = self._infer_shape(size, dist_params)
         _, static_shape = infer_static_shape(shape)
-        dtype = self.dtype or dtype
 
-        if dtype == "floatX":
-            dtype = config.floatX
-        elif dtype is None or (isinstance(dtype, str) and dtype not in all_dtypes):
-            raise TypeError("dtype is unspecified")
-
-        if isinstance(dtype, str):
-            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
-        else:
-            dtype_idx = constant(dtype, dtype="int64")
-
-        dtype = all_dtypes[dtype_idx.data]
-
-        inputs = (rng, size, dtype_idx, *dist_params)
-        out_var = TensorType(dtype=dtype, shape=static_shape)()
-        outputs = (rng.type(), out_var)
+        inputs = (rng, size, *dist_params)
+        out_type = TensorType(dtype=self.dtype, shape=static_shape)
+        outputs = (rng.type(), out_type())
 
         return Apply(self, inputs, outputs)
 
@@ -382,14 +389,12 @@ def size_param(self, node) -> Variable:
 
     def dist_params(self, node) -> Sequence[Variable]:
         """Return the node inpust corresponding to dist params"""
-        return node.inputs[3:]
+        return node.inputs[2:]
 
     def perform(self, node, inputs, outputs):
         rng_var_out, smpl_out = outputs
 
-        rng, size, dtype, *args = inputs
-
-        out_var = node.outputs[1]
+        rng, size, *args = inputs
 
         # If `size == []`, that means no size is enforced, and NumPy is trusted
         # to draw the appropriate number of samples, NumPy uses `size=None` to
@@ -408,11 +413,8 @@ def perform(self, node, inputs, outputs):
 
         smpl_val = self.rng_fn(rng, *([*args, size]))
 
-        if (
-            not isinstance(smpl_val, np.ndarray)
-            or str(smpl_val.dtype) != out_var.type.dtype
-        ):
-            smpl_val = _asarray(smpl_val, dtype=out_var.type.dtype)
+        if not isinstance(smpl_val, np.ndarray) or str(smpl_val.dtype) != self.dtype:
+            smpl_val = _asarray(smpl_val, dtype=self.dtype)
 
         smpl_out[0] = smpl_val
 
@@ -463,7 +465,7 @@ class DefaultGeneratorMakerOp(AbstractRNGConstructor):
 
 @_vectorize_node.register(RandomVariable)
 def vectorize_random_variable(
-    op: RandomVariable, node: Apply, rng, size, dtype, *dist_params
+    op: RandomVariable, node: Apply, rng, size, *dist_params
 ) -> Apply:
     # If size was provided originally and a new size hasn't been provided,
     # We extend it to accommodate the new input batch dimensions.
@@ -491,4 +493,4 @@ def vectorize_random_variable(
         new_size_dims = broadcasted_batch_shape[:new_ndim]
         size = concatenate([new_size_dims, size])
 
-    return op.make_node(rng, size, dtype, *dist_params)
+    return op.make_node(rng, size, *dist_params)
diff --git a/pytensor/tensor/random/rewriting/basic.py b/pytensor/tensor/random/rewriting/basic.py
@@ -81,7 +81,7 @@ def local_rv_size_lift(fgraph, node):
     if not isinstance(node.op, RandomVariable):
         return
 
-    rng, size, dtype, *dist_params = node.inputs
+    rng, size, *dist_params = node.inputs
 
     dist_params = broadcast_params(dist_params, node.op.ndims_params)
 
@@ -105,7 +105,7 @@ def local_rv_size_lift(fgraph, node):
     else:
         return
 
-    new_node = node.op.make_node(rng, None, dtype, *dist_params)
+    new_node = node.op.make_node(rng, None, *dist_params)
 
     if config.compute_test_value != "off":
         compute_test_value(new_node)
@@ -141,7 +141,7 @@ def local_dimshuffle_rv_lift(fgraph, node):
         return False
 
     rv_op = rv_node.op
-    rng, size, dtype, *dist_params = rv_node.inputs
+    rng, size, *dist_params = rv_node.inputs
     rv = rv_node.default_output()
 
     # Check that Dimshuffle does not affect support dims
@@ -185,7 +185,7 @@ def local_dimshuffle_rv_lift(fgraph, node):
         )
         new_dist_params.append(param.dimshuffle(param_new_order))
 
-    new_node = rv_op.make_node(rng, new_size, dtype, *new_dist_params)
+    new_node = rv_op.make_node(rng, new_size, *new_dist_params)
 
     if config.compute_test_value != "off":
         compute_test_value(new_node)
@@ -233,7 +233,7 @@ def is_nd_advanced_idx(idx, dtype):
         return None
 
     rv_op = rv_node.op
-    rng, size, dtype, *dist_params = rv_node.inputs
+    rng, size, *dist_params = rv_node.inputs
 
     # Parse indices
     idx_list = getattr(subtensor_op, "idx_list", None)
@@ -346,7 +346,7 @@ def is_nd_advanced_idx(idx, dtype):
         new_dist_params.append(batch_param[tuple(batch_indices)])
 
     # Create new RV
-    new_node = rv_op.make_node(rng, new_size, dtype, *new_dist_params)
+    new_node = rv_op.make_node(rng, new_size, *new_dist_params)
     new_rv = new_node.default_output()
 
     copy_stack_trace(rv, new_rv)
diff --git a/tests/tensor/random/rewriting/test_basic.py b/tests/tensor/random/rewriting/test_basic.py
@@ -12,6 +12,7 @@
 from pytensor.tensor import constant
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.tensor.random.basic import (
+    NormalRV,
     categorical,
     dirichlet,
     multinomial,
@@ -397,7 +398,7 @@ def test_DimShuffle_lift(ds_order, lifted, dist_op, dist_params, size, rtol):
     )
 
     if lifted:
-        assert new_out.owner.op == dist_op
+        assert isinstance(new_out.owner.op, type(dist_op))
         assert all(
             isinstance(i.owner.op, DimShuffle)
             for i in new_out.owner.op.dist_params(new_out.owner)
@@ -832,7 +833,7 @@ def test_Subtensor_lift_restrictions():
     subtensor_node = fg.outputs[0].owner.inputs[1].owner.inputs[0].owner
     assert subtensor_node == y.owner
     assert isinstance(subtensor_node.op, Subtensor)
-    assert subtensor_node.inputs[0].owner.op == normal
+    assert isinstance(subtensor_node.inputs[0].owner.op, NormalRV)
 
     z = pt.ones(x.shape) - x[1]
 
@@ -850,7 +851,7 @@ def test_Subtensor_lift_restrictions():
     EquilibriumGraphRewriter([local_subtensor_rv_lift], max_use_ratio=100).apply(fg)
 
     rv_node = fg.outputs[0].owner.inputs[1].owner.inputs[0].owner
-    assert rv_node.op == normal
+    assert isinstance(rv_node.op, NormalRV)
     assert isinstance(rv_node.inputs[-1].owner.op, Subtensor)
     assert isinstance(rv_node.inputs[-2].owner.op, Subtensor)
 
@@ -872,7 +873,7 @@ def test_Dimshuffle_lift_restrictions():
     dimshuffle_node = fg.outputs[0].owner.inputs[1].owner
     assert dimshuffle_node == y.owner
     assert isinstance(dimshuffle_node.op, DimShuffle)
-    assert dimshuffle_node.inputs[0].owner.op == normal
+    assert isinstance(dimshuffle_node.inputs[0].owner.op, NormalRV)
 
     z = pt.ones(x.shape) - y
 
@@ -890,7 +891,7 @@ def test_Dimshuffle_lift_restrictions():
     EquilibriumGraphRewriter([local_dimshuffle_rv_lift], max_use_ratio=100).apply(fg)
 
     rv_node = fg.outputs[0].owner.inputs[1].owner
-    assert rv_node.op == normal
+    assert isinstance(rv_node.op, NormalRV)
     assert isinstance(rv_node.inputs[-1].owner.op, DimShuffle)
     assert isinstance(rv_node.inputs[-2].owner.op, DimShuffle)
 
diff --git a/tests/tensor/random/test_op.py b/tests/tensor/random/test_op.py