|
1 | 1 | import warnings
|
2 | 2 | from functools import partial
|
| 3 | +from typing import Callable, Literal, Optional, Union |
3 | 4 |
|
4 | 5 | import numpy as np
|
| 6 | +from numpy.core.numeric import normalize_axis_tuple # type: ignore |
5 | 7 |
|
6 | 8 | from pytensor import scalar as ps
|
7 | 9 | from pytensor.gradient import DisconnectedType
|
@@ -688,41 +690,204 @@ def matrix_power(M, n):
|
688 | 690 | return result
|
689 | 691 |
|
690 | 692 |
|
691 |
| -def norm(x, ord): |
692 |
| - x = as_tensor_variable(x) |
| 693 | +def _multi_svd_norm( |
| 694 | + x: ptb.TensorVariable, row_axis: int, col_axis: int, reduce_op: Callable |
| 695 | +): |
| 696 | + """Compute a function of the singular values of the 2-D matrices in `x`. |
| 697 | +
|
| 698 | + This is a private utility function used by `pytensor.tensor.nlinalg.norm()`. |
| 699 | +
|
| 700 | + Copied from `np.linalg._multi_svd_norm`. |
| 701 | +
|
| 702 | + Parameters |
| 703 | + ---------- |
| 704 | + x : TensorVariable |
| 705 | + Input tensor. |
| 706 | + row_axis, col_axis : int |
| 707 | + The axes of `x` that hold the 2-D matrices. |
| 708 | + reduce_op : callable |
| 709 | + Reduction op. Should be one of `pt.min`, `pt.max`, or `pt.sum` |
| 710 | +
|
| 711 | + Returns |
| 712 | + ------- |
| 713 | + result : float or ndarray |
| 714 | + If `x` is 2-D, the return values is a float. |
| 715 | + Otherwise, it is an array with ``x.ndim - 2`` dimensions. |
| 716 | + The return values are either the minimum or maximum or sum of the |
| 717 | + singular values of the matrices, depending on whether `op` |
| 718 | + is `pt.amin` or `pt.amax` or `pt.sum`. |
| 719 | +
|
| 720 | + """ |
| 721 | + y = ptb.moveaxis(x, (row_axis, col_axis), (-2, -1)) |
| 722 | + result = reduce_op(svd(y, compute_uv=False), axis=-1) |
| 723 | + return result |
| 724 | + |
| 725 | + |
| 726 | +VALID_ORD = Literal["fro", "f", "nuc", "inf", "-inf", 0, 1, -1, 2, -2] |
| 727 | + |
| 728 | + |
| 729 | +def norm( |
| 730 | + x: ptb.TensorVariable, |
| 731 | + ord: Optional[Union[float, VALID_ORD]] = None, |
| 732 | + axis: Optional[Union[int, tuple[int, ...]]] = None, |
| 733 | + keepdims: bool = False, |
| 734 | +): |
| 735 | + """ |
| 736 | + Matrix or vector norm. |
| 737 | +
|
| 738 | + Parameters |
| 739 | + ---------- |
| 740 | + x: TensorVariable |
| 741 | + Tensor to take norm of. |
| 742 | +
|
| 743 | + ord: float, str or int, optional |
| 744 | + Order of norm. If `ord` is a str, it must be one of the following: |
| 745 | + - 'fro' or 'f' : Frobenius norm |
| 746 | + - 'nuc' : nuclear norm |
| 747 | + - 'inf' : Infinity norm |
| 748 | + - '-inf' : Negative infinity norm |
| 749 | + If an integer, order can be one of -2, -1, 0, 1, or 2. |
| 750 | + Otherwise `ord` must be a float. |
| 751 | +
|
| 752 | + Default is the Frobenius (L2) norm. |
| 753 | +
|
| 754 | + axis: tuple of int, optional |
| 755 | + Axes over which to compute the norm. If None, norm of entire matrix (or vector) is computed. Row or column |
| 756 | + norms can be computed by passing a single integer; this will treat a matrix like a batch of vectors. |
| 757 | +
|
| 758 | + keepdims: bool |
| 759 | + If True, dummy axes will be inserted into the output so that norm.dnim == x.dnim. Default is False. |
| 760 | +
|
| 761 | + Returns |
| 762 | + ------- |
| 763 | + TensorVariable |
| 764 | + Norm of `x` along axes specified by `axis`. |
| 765 | +
|
| 766 | + Notes |
| 767 | + ----- |
| 768 | + Batched dimensions are supported to the left of the core dimensions. For example, if `x` is a 3D tensor with |
| 769 | + shape (2, 3, 4), then `norm(x)` will compute the norm of each 3x4 matrix in the batch. |
| 770 | +
|
| 771 | + If the input is a 2D tensor and should be treated as a batch of vectors, the `axis` argument must be specified. |
| 772 | + """ |
| 773 | + x = ptb.as_tensor_variable(x) |
| 774 | + |
693 | 775 | ndim = x.ndim
|
694 |
| - if ndim == 0: |
695 |
| - raise ValueError("'axis' entry is out of bounds.") |
696 |
| - elif ndim == 1: |
697 |
| - if ord is None: |
698 |
| - return ptm.sum(x**2) ** 0.5 |
699 |
| - elif ord == "inf": |
700 |
| - return ptm.max(abs(x)) |
701 |
| - elif ord == "-inf": |
702 |
| - return ptm.min(abs(x)) |
| 776 | + core_ndim = min(2, ndim) |
| 777 | + batch_ndim = ndim - core_ndim |
| 778 | + |
| 779 | + if axis is None: |
| 780 | + # Handle some common cases first. These can be computed more quickly than the default SVD way, so we always |
| 781 | + # want to check for them. |
| 782 | + if ( |
| 783 | + (ord is None) |
| 784 | + or (ord in ("f", "fro") and core_ndim == 2) |
| 785 | + or (ord == 2 and core_ndim == 1) |
| 786 | + ): |
| 787 | + x = x.reshape(tuple(x.shape[:-2]) + (-1,) + (1,) * (core_ndim - 1)) |
| 788 | + batch_T_dim_order = tuple(range(batch_ndim)) + tuple( |
| 789 | + range(batch_ndim + core_ndim - 1, batch_ndim - 1, -1) |
| 790 | + ) |
| 791 | + |
| 792 | + if x.dtype.startswith("complex"): |
| 793 | + x_real = x.real # type: ignore |
| 794 | + x_imag = x.imag # type: ignore |
| 795 | + sqnorm = ( |
| 796 | + ptb.transpose(x_real, batch_T_dim_order) @ x_real |
| 797 | + + ptb.transpose(x_imag, batch_T_dim_order) @ x_imag |
| 798 | + ) |
| 799 | + else: |
| 800 | + sqnorm = ptb.transpose(x, batch_T_dim_order) @ x |
| 801 | + ret = ptm.sqrt(sqnorm).squeeze() |
| 802 | + if keepdims: |
| 803 | + ret = ptb.shape_padright(ret, core_ndim) |
| 804 | + return ret |
| 805 | + |
| 806 | + # No special computation to exploit -- set default axis before continuing |
| 807 | + axis = tuple(range(core_ndim)) |
| 808 | + |
| 809 | + elif not isinstance(axis, tuple): |
| 810 | + try: |
| 811 | + axis = int(axis) |
| 812 | + except Exception as e: |
| 813 | + raise TypeError( |
| 814 | + "'axis' must be None, an integer, or a tuple of integers" |
| 815 | + ) from e |
| 816 | + |
| 817 | + axis = (axis,) |
| 818 | + |
| 819 | + if len(axis) == 1: |
| 820 | + # Vector norms |
| 821 | + if ord in [None, "fro", "f"] and (core_ndim == 2): |
| 822 | + # This is here to catch the case where X is a 2D tensor but the user wants to treat it as a batch of |
| 823 | + # vectors. Other vector norms will work fine in this case. |
| 824 | + ret = ptm.sqrt(ptm.sum((x.conj() * x).real, axis=axis, keepdims=keepdims)) |
| 825 | + elif (ord == "inf") or (ord == np.inf): |
| 826 | + ret = ptm.max(ptm.abs(x), axis=axis, keepdims=keepdims) |
| 827 | + elif (ord == "-inf") or (ord == -np.inf): |
| 828 | + ret = ptm.min(ptm.abs(x), axis=axis, keepdims=keepdims) |
703 | 829 | elif ord == 0:
|
704 |
| - return x[x.nonzero()].shape[0] |
| 830 | + ret = ptm.neq(x, 0).sum(axis=axis, keepdims=keepdims) |
| 831 | + elif ord == 1: |
| 832 | + ret = ptm.sum(ptm.abs(x), axis=axis, keepdims=keepdims) |
| 833 | + elif isinstance(ord, str): |
| 834 | + raise ValueError(f"Invalid norm order '{ord}' for vectors") |
705 | 835 | else:
|
706 |
| - try: |
707 |
| - z = ptm.sum(abs(x**ord)) ** (1.0 / ord) |
708 |
| - except TypeError: |
709 |
| - raise ValueError("Invalid norm order for vectors.") |
710 |
| - return z |
711 |
| - elif ndim == 2: |
712 |
| - if ord is None or ord == "fro": |
713 |
| - return ptm.sum(abs(x**2)) ** (0.5) |
714 |
| - elif ord == "inf": |
715 |
| - return ptm.max(ptm.sum(abs(x), 1)) |
716 |
| - elif ord == "-inf": |
717 |
| - return ptm.min(ptm.sum(abs(x), 1)) |
| 836 | + ret = ptm.sum(ptm.abs(x) ** ord, axis=axis, keepdims=keepdims) |
| 837 | + ret **= ptm.reciprocal(ord) |
| 838 | + |
| 839 | + return ret |
| 840 | + |
| 841 | + elif len(axis) == 2: |
| 842 | + # Matrix norms |
| 843 | + row_axis, col_axis = ( |
| 844 | + batch_ndim + x for x in normalize_axis_tuple(axis, core_ndim) |
| 845 | + ) |
| 846 | + axis = (row_axis, col_axis) |
| 847 | + |
| 848 | + if ord in [None, "fro", "f"]: |
| 849 | + ret = ptm.sqrt(ptm.sum((x.conj() * x).real, axis=axis)) |
| 850 | + |
| 851 | + elif (ord == "inf") or (ord == np.inf): |
| 852 | + if row_axis > col_axis: |
| 853 | + row_axis -= 1 |
| 854 | + ret = ptm.max(ptm.sum(ptm.abs(x), axis=col_axis), axis=row_axis) |
| 855 | + |
| 856 | + elif (ord == "-inf") or (ord == -np.inf): |
| 857 | + if row_axis > col_axis: |
| 858 | + row_axis -= 1 |
| 859 | + ret = ptm.min(ptm.sum(ptm.abs(x), axis=col_axis), axis=row_axis) |
| 860 | + |
718 | 861 | elif ord == 1:
|
719 |
| - return ptm.max(ptm.sum(abs(x), 0)) |
| 862 | + if col_axis > row_axis: |
| 863 | + col_axis -= 1 |
| 864 | + ret = ptm.max(ptm.sum(ptm.abs(x), axis=row_axis), axis=col_axis) |
| 865 | + |
720 | 866 | elif ord == -1:
|
721 |
| - return ptm.min(ptm.sum(abs(x), 0)) |
| 867 | + if col_axis > row_axis: |
| 868 | + col_axis -= 1 |
| 869 | + ret = ptm.min(ptm.sum(ptm.abs(x), axis=row_axis), axis=col_axis) |
| 870 | + |
| 871 | + elif ord == 2: |
| 872 | + ret = _multi_svd_norm(x, row_axis, col_axis, ptm.max) |
| 873 | + |
| 874 | + elif ord == -2: |
| 875 | + ret = _multi_svd_norm(x, row_axis, col_axis, ptm.min) |
| 876 | + |
| 877 | + elif ord == "nuc": |
| 878 | + ret = _multi_svd_norm(x, row_axis, col_axis, ptm.sum) |
| 879 | + |
722 | 880 | else:
|
723 |
| - raise ValueError(0) |
724 |
| - elif ndim > 2: |
725 |
| - raise NotImplementedError("We don't support norm with ndim > 2") |
| 881 | + raise ValueError(f"Invalid norm order for matrices: {ord}") |
| 882 | + |
| 883 | + if keepdims: |
| 884 | + ret = ptb.expand_dims(ret, axis) |
| 885 | + |
| 886 | + return ret |
| 887 | + else: |
| 888 | + raise ValueError( |
| 889 | + f"Cannot compute norm when core_dims < 1 or core_dims > 3, found: core_dims = {core_ndim}" |
| 890 | + ) |
726 | 891 |
|
727 | 892 |
|
728 | 893 | class TensorInv(Op):
|
|
0 commit comments