Last PR sampling working

cetagostini · cetagostini · commit fb8fd2f12ca6 · 2025-04-23T11:15:11.000+03:00
Working
diff --git a/pytensor/link/mlx/dispatch/core.py b/pytensor/link/mlx/dispatch/core.py
@@ -127,21 +127,64 @@ def extract_diag(x, offset=offset, axis1=axis1, axis2=axis2):
 # ------------------------------------------------------------------
 @mlx_funcify.register(Eye)  # MLX
 def mlx_funcify_Eye(op, **kwargs):
-    dtype = op.dtype
+    dtype = convert_dtype_to_mlx(op.dtype)
 
     def eye(N, M, k):
         return mx.eye(int(N), int(M), int(k), dtype=dtype)  # MLX
 
     return eye
 
 
+def convert_dtype_to_mlx(dtype_str):
+    """Convert PyTensor dtype strings to MLX dtype objects.
+
+    MLX expects dtype objects rather than string literals for type conversion.
+    This function maps common dtype strings to their MLX equivalents.
+    """
+    if isinstance(dtype_str, str):
+        if dtype_str == "bool":
+            return mx.bool_
+        elif dtype_str == "int8":
+            return mx.int8
+        elif dtype_str == "int16":
+            return mx.int16
+        elif dtype_str == "int32":
+            return mx.int32
+        elif dtype_str == "int64":
+            return mx.int64
+        elif dtype_str == "uint8":
+            return mx.uint8
+        elif dtype_str == "uint16":
+            return mx.uint16
+        elif dtype_str == "uint32":
+            return mx.uint32
+        elif dtype_str == "uint64":
+            return mx.uint64
+        elif dtype_str == "float16":
+            return mx.float16
+        elif dtype_str == "float32":
+            return mx.float32
+        elif dtype_str == "float64":
+            return mx.float64
+        elif dtype_str == "bfloat16":
+            return mx.bfloat16
+        elif dtype_str == "complex64":
+            return mx.complex64
+        elif dtype_str == "complex128":
+            return mx.complex128
+    # Return as is if it's already an MLX dtype or not a recognized string
+    return dtype_str
+
+
 # ------------------------------------------------------------------
 # MakeVector
 # ------------------------------------------------------------------
 @mlx_funcify.register(MakeVector)  # MLX
 def mlx_funcify_MakeVector(op, **kwargs):
+    dtype = convert_dtype_to_mlx(op.dtype)
+
     def makevector(*x):
-        return mx.array(x, dtype=op.dtype)  # MLX
+        return mx.array(x, dtype=dtype)  # MLX
 
     return makevector
 
@@ -175,31 +218,36 @@ def scalar_from_tensor(x):
 def mlx_funcify_Tri(op, node, **kwargs):
     # node.inputs  ->  N, M, k
     const_args = [getattr(inp, "data", None) for inp in node.inputs]
+    dtype = convert_dtype_to_mlx(op.dtype)
 
     def tri(*args):
         # Replace args with compile-time constants when available
         args = [
             arg if const_a is None else const_a
             for arg, const_a in zip(args, const_args, strict=True)
         ]
-        return mx.tri(*args, dtype=op.dtype)  # MLX
+        return mx.tri(*args, dtype=dtype)  # MLX
 
     return tri
 
 
 @mlx_funcify.register(AllocEmpty)
 def mlx_funcify_AllocEmpty(op, **kwargs):
+    dtype = convert_dtype_to_mlx(op.dtype)
+
     def allocempty(*shape):
-        return mx.zeros(shape, dtype=op.dtype)
+        return mx.zeros(shape, dtype=dtype)
 
     return allocempty
 
 
 @mlx_funcify.register(Alloc)
 def mlx_funcify_Alloc(op, node, **kwargs):
     def alloc(x, *shape):
-        res = mx.broadcast_to(x, shape)
-        Alloc._check_runtime_broadcast(node, mx.array(x), res.shape)
+        # Convert x to an MLX array with the correct dtype if it's a scalar
+        x_array = mx.array(x)
+        res = mx.broadcast_to(x_array, shape)
+        Alloc._check_runtime_broadcast(node, x_array, res.shape)
         return res
 
     return alloc
diff --git a/pytensor/link/mlx/dispatch/elemwise.py b/pytensor/link/mlx/dispatch/elemwise.py
@@ -1,65 +1,37 @@
 import mlx.core as mx
+import numpy as np
 
 from pytensor.link.mlx.dispatch.basic import mlx_funcify
+from pytensor.link.mlx.dispatch.core import convert_dtype_to_mlx
 from pytensor.scalar import Softplus
-from pytensor.tensor.elemwise import CAReduce, DimShuffle
-from pytensor.tensor.special import Softmax, SoftmaxGrad
-
 from pytensor.scalar.basic import (
     AND,
-    EQ,
-    GE,
-    GT,
-    LE,
-    LT,
-    NEQ,
     OR,
-    Abs,
     Add,
     Cast,
-    Cos,
-    Exp,
-    Log,
-    Log1p,
     Mul,
-    Neg,
-    Pow,
-    ScalarMaximum,
-    ScalarMinimum,
-    Sign,
-    Sin,
-    Sqr,
-    Sqrt,
-    Sub,
-    Switch,
-    TrueDiv,
 )
+from pytensor.tensor.elemwise import CAReduce, DimShuffle
+from pytensor.tensor.special import Softmax, SoftmaxGrad
+
 
 @mlx_funcify.register(DimShuffle)
 def mlx_funcify_DimShuffle(op, **kwargs):
     def dimshuffle(x):
+        # Convert scalar to array if needed
+        if isinstance(x, int | float) or (
+            isinstance(x, np.number) and not isinstance(x, np.ndarray)
+        ):
+            x = mx.array(x)
         res = mx.transpose(x, op.transposition)
-
         shape = list(res.shape[: len(op.shuffle)])
-
         for augm in op.augment:
             shape.insert(augm, 1)
-
         return mx.reshape(res, shape)
 
     return dimshuffle
 
 
-@mlx_funcify.register(DimShuffle)
-def mlx_funcify_DimShuffle(op, **kwargs):
-    def dimshuffle(x):
-        res = mx.transpose(x, op.transposition)
-        shape = list(res.shape[: len(op.shuffle)])
-        for augm in op.augment:
-            shape.insert(augm, 1)
-        return mx.reshape(res, shape)
-    return dimshuffle
-
 @mlx_funcify.register(CAReduce)
 def mlx_funcify_CAReduce(op, **kwargs):
     if isinstance(op.scalar_op, Add):
@@ -86,23 +58,10 @@ def any(x):
             return mx.any(x, axis=op.axis)
 
         return any
-    elif isinstance(op.scalar_op, ScalarMaximum):
-
-        def max(x):
-            return x.max(axis=op.axis)
-
-        return max
-    elif isinstance(op.scalar_op, ScalarMinimum):
-
-        def min(x):
-            return x.min(axis=op.axis)
-
-        return min
     else:
         raise NotImplementedError(f"MLX does not support Elemwise {op.scalar_op}")
 
 
-
 @mlx_funcify.register(Softmax)
 def mlx_funcify_Softmax(op, **kwargs):
     axis = op.axis
@@ -142,3 +101,12 @@ def softplus(x):
         )
 
     return softplus
+
+
+@mlx_funcify.register(Cast)
+def mlx_funcify_Cast(op, **kwargs):
+    def cast(x):
+        dtype = convert_dtype_to_mlx(op.scalar_op.o_type.dtype)
+        return x.astype(dtype)
+
+    return cast
diff --git a/pytensor/link/mlx/dispatch/math.py b/pytensor/link/mlx/dispatch/math.py
@@ -1,6 +1,7 @@
 import mlx.core as mx
 
-from pytensor.link.mlx.dispatch import mlx_funcify
+from pytensor.link.mlx.dispatch import mlx_funcify, mlx_typify
+from pytensor.link.mlx.dispatch.core import convert_dtype_to_mlx
 from pytensor.scalar import Softplus
 from pytensor.scalar.basic import (
     AND,
@@ -36,6 +37,12 @@
 from pytensor.tensor.math import Dot
 
 
+@mlx_typify.register(int)
+@mlx_typify.register(float)
+def mlx_typify_python_scalar(data, **kwargs):
+    return mx.array(data)
+
+
 @mlx_funcify.register(Dot)
 def mlx_funcify_Dot(op, **kwargs):
     def dot(x, y):
@@ -210,20 +217,21 @@ def any(x, y):
         return any
     elif isinstance(op.scalar_op, ScalarMaximum):
 
-        def max(x):
-            return x.max(axis=op.axis)
+        def max(x, y):
+            return mx.maximum(x, y)
 
         return max
     elif isinstance(op.scalar_op, ScalarMinimum):
 
-        def min(x):
-            return x.min(axis=op.axis)
+        def min(x, y):
+            return mx.minimum(x, y)
 
         return min
     elif isinstance(op.scalar_op, Cast):
 
         def cast(x):
-            return mx.cast(x, op.dtype)
+            dtype = convert_dtype_to_mlx(op.scalar_op.o_type.dtype)
+            return x.astype(dtype)
 
         return cast
     elif isinstance(op.scalar_op, Sign):