pymc-devs · ricardoV94 · Apr 29, 2025 · Apr 29, 2025 · Apr 3, 2025 · Apr 6, 2025
diff --git a/pytensor/link/jax/dispatch/subtensor.py b/pytensor/link/jax/dispatch/subtensor.py
@@ -67,6 +67,9 @@
         if len(indices) == 1:
             indices = indices[0]
 
+        if isinstance(op, AdvancedIncSubtensor1):
+            op._check_runtime_broadcasting(node, x, y, indices)
+
         return jax_fn(x, indices, y)
 
     return incsubtensor

diff --git a/pytensor/link/numba/dispatch/slinalg.py b/pytensor/link/numba/dispatch/slinalg.py
@@ -83,7 +83,7 @@ def cholesky(a):
 @numba_funcify.register(PivotToPermutations)
 def pivot_to_permutation(op, node, **kwargs):
     inverse = op.inverse
-    dtype = node.inputs[0].dtype
+    dtype = node.outputs[0].dtype
 
     @numba_njit
     def numba_pivot_to_permutation(piv):

diff --git a/pytensor/link/numba/dispatch/subtensor.py b/pytensor/link/numba/dispatch/subtensor.py
@@ -287,11 +287,11 @@ def numba_funcify_AdvancedIncSubtensor1(op, node, **kwargs):
     inplace = op.inplace
     set_instead_of_inc = op.set_instead_of_inc
     x, vals, idxs = node.inputs
-    # TODO: Add explicit expand_dims in make_node so we don't need to worry about this here
-    broadcast = vals.type.ndim < x.type.ndim or vals.type.broadcastable[0]
+    broadcast_with_index = vals.type.ndim < x.type.ndim or vals.type.broadcastable[0]
+    # TODO: Add runtime_broadcast check
 
     if set_instead_of_inc:
-        if broadcast:
+        if broadcast_with_index:
 
             @numba_njit(boundscheck=True)
             def advancedincsubtensor1_inplace(x, val, idxs):
@@ -318,7 +318,7 @@ def advancedincsubtensor1_inplace(x, vals, idxs):
                     x[idx] = val
                 return x
     else:
-        if broadcast:
+        if broadcast_with_index:
 
             @numba_njit(boundscheck=True)
             def advancedincsubtensor1_inplace(x, val, idxs):

diff --git a/pytensor/link/pytorch/dispatch/subtensor.py b/pytensor/link/pytorch/dispatch/subtensor.py
@@ -109,6 +109,8 @@
 
         def adv_set_subtensor(x, y, *indices):
             check_negative_steps(indices)
+            if isinstance(op, AdvancedIncSubtensor1):
+                op._check_runtime_broadcasting(node, x, y, indices)
             if not inplace:
                 x = x.clone()
             x[indices] = y.type_as(x)
@@ -120,6 +122,8 @@
 
         def adv_inc_subtensor_no_duplicates(x, y, *indices):
             check_negative_steps(indices)
+            if isinstance(op, AdvancedIncSubtensor1):
+                op._check_runtime_broadcasting(node, x, y, indices)
             if not inplace:
                 x = x.clone()
             x[indices] += y.type_as(x)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
@@ -1634,6 +1634,14 @@ def _check_runtime_broadcast(node, value, shape):
             if v_static_dim is None and value_dim == 1 and out_dim != 1:
                 raise ValueError(Alloc._runtime_broadcast_error_msg)
 
+    @staticmethod
+    def value_is_scalar_zero(x: TensorVariable) -> bool:
+        return (
+            all(x.type.broadcastable)
+            and isinstance(x, Constant)
+            and (x.unique_value == 0)
+        )
+
     def perform(self, node, inputs, out_):
         (out,) = out_
         v = inputs[0]
@@ -1659,6 +1667,7 @@ def c_code(self, node, name, inp, out, sub):
         o_static_shape = node.outputs[0].type.shape
         v_ndim = len(v_static_shape)
         o_ndim = len(o_static_shape)
+        is_zero = self.value_is_scalar_zero(node.inputs[0])
         assert o_ndim == len(inp[1:])
 
         # Declare variables
@@ -1699,16 +1708,18 @@ def c_code(self, node, name, inp, out, sub):
                     {fail}
                 }}
             }}
-
+            if ({int(is_zero)} && (PyArray_IS_C_CONTIGUOUS({zz}) || PyArray_IS_F_CONTIGUOUS({zz}))){{
+                PyArray_FILLWBYTE({zz}, 0);
+            }}
             // This function takes care of broadcasting
-            if (PyArray_CopyInto({zz}, {vv}) == -1)
+            else if (PyArray_CopyInto({zz}, {vv}) == -1)
               {fail}
             """
 
         return code
 
     def c_code_cache_version(self):
-        return (4,)
+        return (5,)
 
     def infer_shape(self, fgraph, node, input_shapes):
         return [node.inputs[1:]]

diff --git a/pytensor/tensor/rewriting/subtensor.py b/pytensor/tensor/rewriting/subtensor.py
@@ -1295,12 +1295,26 @@
 
 @node_rewriter([AdvancedIncSubtensor1], inplace=True)
 def local_inplace_AdvancedIncSubtensor1(fgraph, node):
-    if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
-        new_op = node.op.clone_inplace()
-        new_node = new_op(*node.inputs)
-        copy_stack_trace(node.outputs, new_node)
-        return [new_node]
-    return False
+    if node.op.inplace:
+        return
+
+    x, y, idx = node.inputs
+    if fgraph.has_destroyers([x]):
+        # In this case we can't operate inplace, but if x is just an alloc of zeros
+        # We're better off duplicating it and then acting on it inplace.
+        if (
+            x.owner is not None
+            and isinstance(x.owner.op, Alloc)
+            and x.owner.op.value_is_scalar_zero(x.owner.inputs[0])
+        ):
+            x = x.owner.clone().outputs[0]
+        else:
+            return None  # Inplace isn't valid
+
+    new_op = node.op.clone_inplace()
+    new_node = new_op(x, y, idx)
+    copy_stack_trace(node.outputs, new_node)
+    return [new_node]
 
 
 compile.optdb.register(

diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
@@ -604,7 +604,7 @@ def make_node(self, pivots):
 
     def perform(self, node, inputs, outputs):
         [pivots] = inputs
-        p_inv = np.arange(len(pivots), dtype=pivots.dtype)
+        p_inv = np.arange(len(pivots), dtype="int64")
 
         for i in range(len(pivots)):
             p_inv[i], p_inv[pivots[i]] = p_inv[pivots[i]], p_inv[i]
@@ -639,7 +639,7 @@ def make_node(self, A):
             )
 
         LU = matrix(shape=A.type.shape, dtype=A.type.dtype)
-        pivots = vector(shape=(A.type.shape[0],), dtype="int64")
+        pivots = vector(shape=(A.type.shape[0],), dtype="int32")
 
         return Apply(self, [A], [LU, pivots])