From c3b666e611f840fc335615a561239cce097f9898 Mon Sep 17 00:00:00 2001
From: Thomas Wiecki <thomas.wiecki@gmail.com>
Date: Sat, 24 Dec 2022 10:10:25 +0100
Subject: [PATCH] Rename aesara to pytensor.

---
 examples/case_studies/GEV.ipynb               |   8 +-
 examples/case_studies/GEV.myst.md             |   4 +-
 examples/case_studies/LKJ.ipynb               |   4 +-
 examples/case_studies/LKJ.myst.md             |   2 +-
 .../bayesian_ab_testing_introduction.ipynb    |   4 +-
 .../bayesian_ab_testing_introduction.myst.md  |   2 +-
 examples/case_studies/binning.ipynb           |   6 +-
 examples/case_studies/binning.myst.md         |   4 +-
 .../blackbox_external_likelihood_numpy.ipynb  | 110 +++++++++---------
 ...blackbox_external_likelihood_numpy.myst.md |  36 +++---
 examples/case_studies/factor_analysis.ipynb   |  24 ++--
 examples/case_studies/factor_analysis.myst.md |  10 +-
 .../hierarchical_partial_pooling.ipynb        |   4 +-
 .../hierarchical_partial_pooling.myst.md      |   2 +-
 examples/case_studies/item_response_nba.ipynb |   4 +-
 .../case_studies/item_response_nba.myst.md    |   2 +-
 .../case_studies/mediation_analysis.ipynb     |   4 +-
 .../case_studies/mediation_analysis.myst.md   |   2 +-
 .../case_studies/moderation_analysis.ipynb    |   4 +-
 .../case_studies/moderation_analysis.myst.md  |   2 +-
 .../case_studies/multilevel_modeling.ipynb    |   4 +-
 .../case_studies/multilevel_modeling.myst.md  |   2 +-
 .../probabilistic_matrix_factorization.ipynb  |  14 +--
 ...probabilistic_matrix_factorization.myst.md |   6 +-
 examples/case_studies/putting_workflow.ipynb  |  20 ++--
 .../case_studies/putting_workflow.myst.md     |   2 +-
 .../case_studies/reinforcement_learning.ipynb |  40 +++----
 .../reinforcement_learning.myst.md            |  38 +++---
 examples/case_studies/rugby_analytics.ipynb   |  14 +--
 examples/case_studies/rugby_analytics.myst.md |   4 +-
 examples/case_studies/spline.ipynb            |   4 +-
 examples/case_studies/spline.myst.md          |   2 +-
 .../case_studies/stochastic_volatility.ipynb  |   4 +-
 .../stochastic_volatility.myst.md             |   2 +-
 .../case_studies/wrapping_jax_function.ipynb  | 104 ++++++++---------
 .../wrapping_jax_function.myst.md             |  90 +++++++-------
 .../difference_in_differences.ipynb           |   4 +-
 .../difference_in_differences.myst.md         |   2 +-
 examples/causal_inference/excess_deaths.ipynb |   8 +-
 .../causal_inference/excess_deaths.myst.md    |   4 +-
 .../interrupted_time_series.ipynb             |   4 +-
 .../interrupted_time_series.myst.md           |   2 +-
 .../regression_discontinuity.ipynb            |   8 +-
 .../regression_discontinuity.myst.md          |   2 +-
 .../sampler-stats.ipynb                       |   2 +-
 examples/gaussian_processes/GP-Kron.ipynb     |   4 +-
 examples/gaussian_processes/GP-Kron.myst.md   |   2 +-
 examples/gaussian_processes/GP-Latent.ipynb   |   4 +-
 examples/gaussian_processes/GP-Latent.myst.md |   2 +-
 .../gaussian_processes/GP-MeansAndCovs.ipynb  |  12 +-
 .../GP-MeansAndCovs.myst.md                   |  10 +-
 .../MOGP-Coregion-Hadamard.ipynb              |   8 +-
 .../MOGP-Coregion-Hadamard.myst.md            |   4 +-
 .../gaussian_processes/gaussian_process.ipynb |   8 +-
 .../gaussian_process.myst.md                  |   4 +-
 .../GLM-binomial-regression.ipynb             |   4 +-
 .../GLM-binomial-regression.myst.md           |   2 +-
 .../GLM-hierarchical-binomial-model.ipynb     |   4 +-
 .../GLM-hierarchical-binomial-model.myst.md   |   2 +-
 .../GLM-negative-binomial-regression.ipynb    |   4 +-
 .../GLM-negative-binomial-regression.myst.md  |   2 +-
 .../GLM-poisson-regression.ipynb              |   4 +-
 .../GLM-poisson-regression.myst.md            |   2 +-
 .../GLM-robust.ipynb                          |   6 +-
 .../GLM-robust.myst.md                        |   4 +-
 .../GLM-rolling-regression.ipynb              |   4 +-
 .../GLM-rolling-regression.myst.md            |   2 +-
 .../GLM-simpsons-paradox.ipynb                |  16 +--
 .../GLM-simpsons-paradox.myst.md              |   2 +-
 .../GLM-truncated-censored-regression.ipynb   |   4 +-
 .../GLM-truncated-censored-regression.myst.md |   2 +-
 examples/howto/api_quickstart.ipynb           |  16 +--
 examples/howto/api_quickstart.myst.md         |  10 +-
 examples/howto/custom_distribution.ipynb      |   2 +-
 examples/howto/custom_distribution.myst.md    |   2 +-
 examples/howto/howto_debugging.ipynb          |  20 ++--
 examples/howto/howto_debugging.myst.md        |  16 +--
 examples/howto/lasso_block_update.ipynb       |   4 +-
 examples/howto/lasso_block_update.myst.md     |   2 +-
 examples/howto/sampling_compound_step.ipynb   |   6 +-
 examples/howto/sampling_compound_step.myst.md |   4 +-
 .../gaussian_mixture_model.ipynb              |   8 +-
 .../gaussian_mixture_model.myst.md            |   2 +-
 examples/samplers/MLDA_introduction.ipynb     |   2 +-
 examples/samplers/MLDA_introduction.myst.md   |   2 +-
 examples/samplers/SMC2_gaussians.ipynb        |   4 +-
 examples/samplers/SMC2_gaussians.myst.md      |   2 +-
 .../survival_analysis/censored_data.ipynb     |  20 ++--
 .../survival_analysis/censored_data.myst.md   |   2 +-
 ...ngers-Prophet_with_Bayesian_workflow.ipynb |   4 +-
 ...ers-Prophet_with_Bayesian_workflow.myst.md |   2 +-
 ...recasting_with_structural_timeseries.ipynb |   4 +-
 ...casting_with_structural_timeseries.myst.md |   2 +-
 examples/time_series/bayesian_var_model.ipynb |  20 ++--
 .../time_series/bayesian_var_model.myst.md    |   2 +-
 .../bayesian_neural_network_advi.ipynb        |   8 +-
 .../bayesian_neural_network_advi.myst.md      |   6 +-
 .../variational_inference/pathfinder.ipynb    |   6 +-
 .../variational_inference/pathfinder.myst.md  |   4 +-
 99 files changed, 464 insertions(+), 464 deletions(-)

diff --git a/examples/case_studies/GEV.ipynb b/examples/case_studies/GEV.ipynb
index d352ca44d..d84910559 100644
--- a/examples/case_studies/GEV.ipynb
+++ b/examples/case_studies/GEV.ipynb
@@ -45,12 +45,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
     "import pymc_experimental.distributions as pmx\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "from arviz.plots import plot_utils as azpu"
    ]
@@ -984,7 +984,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.5.0\n",
       "\n",
-      "aesara: 2.8.6\n",
+      "pytensor: 2.8.6\n",
       "arviz : 0.12.1\n",
       "\n",
       "pymc_experimental: 0.0.1\n",
@@ -993,7 +993,7 @@
       "arviz            : 0.12.1\n",
       "numpy            : 1.23.3\n",
       "json             : 2.0.9\n",
-      "aesara           : 2.8.6\n",
+      "pytensor           : 2.8.6\n",
       "pymc             : 3.9.3+1493.g372d7c24\n",
       "\n",
       "Watermark: 2.3.1\n",
@@ -1003,7 +1003,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,arviz"
+    "%watermark -n -u -v -iv -w -p pytensor,arviz"
    ]
   },
   {
diff --git a/examples/case_studies/GEV.myst.md b/examples/case_studies/GEV.myst.md
index 04f6cf135..fd5b3c32f 100644
--- a/examples/case_studies/GEV.myst.md
+++ b/examples/case_studies/GEV.myst.md
@@ -40,12 +40,12 @@ Note that this parametrization of the shape parameter $\xi$ is opposite in sign
 We will use the example of the Port Pirie annual maximum sea-level data used in {cite:t}`coles2001gev`, and compare with the frequentist results presented there.
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
 import pymc_experimental.distributions as pmx
+import pytensor.tensor as at
 
 from arviz.plots import plot_utils as azpu
 ```
@@ -232,7 +232,7 @@ az.plot_pair(idata, var_names=["μ", "σ", "ξ"], kind="kde", marginals=True, di
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,arviz
+%watermark -n -u -v -iv -w -p pytensor,arviz
 ```
 
 ```{code-cell} ipython3
diff --git a/examples/case_studies/LKJ.ipynb b/examples/case_studies/LKJ.ipynb
index fb3058dfb..1dd81dca1 100644
--- a/examples/case_studies/LKJ.ipynb
+++ b/examples/case_studies/LKJ.ipynb
@@ -987,7 +987,7 @@
       "Python version       : 3.7.12\n",
       "IPython version      : 5.5.0\n",
       "\n",
-      "aesara: 2.4.0\n",
+      "pytensor: 2.4.0\n",
       "xarray: 0.18.2\n",
       "\n",
       "pymc      : 4.0.0b2\n",
@@ -1004,7 +1004,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,xarray"
    ]
   }
  ],
diff --git a/examples/case_studies/LKJ.myst.md b/examples/case_studies/LKJ.myst.md
index a4ee601f5..eadddaab6 100644
--- a/examples/case_studies/LKJ.myst.md
+++ b/examples/case_studies/LKJ.myst.md
@@ -326,5 +326,5 @@ id: kJCfuzGtr2Pq
 outputId: da547b05-d812-4959-aff6-cf4a12faca15
 ---
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,xarray
+%watermark -n -u -v -iv -w -p pytensor,xarray
 ```
diff --git a/examples/case_studies/bayesian_ab_testing_introduction.ipynb b/examples/case_studies/bayesian_ab_testing_introduction.ipynb
index c7d997b3e..4d9429a08 100644
--- a/examples/case_studies/bayesian_ab_testing_introduction.ipynb
+++ b/examples/case_studies/bayesian_ab_testing_introduction.ipynb
@@ -2163,7 +2163,7 @@
       "Python version       : 3.8.10\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.3\n",
+      "pytensor: 2.7.3\n",
       "xarray: 2022.3.0\n",
       "\n",
       "matplotlib: 3.5.2\n",
@@ -2181,7 +2181,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/bayesian_ab_testing_introduction.myst.md b/examples/case_studies/bayesian_ab_testing_introduction.myst.md
index d76b56cc5..33b258a00 100644
--- a/examples/case_studies/bayesian_ab_testing_introduction.myst.md
+++ b/examples/case_studies/bayesian_ab_testing_introduction.myst.md
@@ -687,7 +687,7 @@ We also plan to create more PyMC tutorials on these topics, so stay tuned!
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,xarray
+%watermark -n -u -v -iv -w -p pytensor,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/binning.ipynb b/examples/case_studies/binning.ipynb
index 49ddbf92c..954897f0f 100644
--- a/examples/case_studies/binning.ipynb
+++ b/examples/case_studies/binning.ipynb
@@ -72,7 +72,7 @@
     "We are now in a position to sketch out a generative PyMC model:\n",
     "\n",
     "```python\n",
-    "import aesara.tensor as at\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "with pm.Model() as model:\n",
     "    # priors\n",
@@ -120,12 +120,12 @@
    "source": [
     "import warnings\n",
     "\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "\n",
     "warnings.filterwarnings(action=\"ignore\", category=UserWarning)"
@@ -3538,7 +3538,7 @@
       "pymc      : 4.0.0b6\n",
       "arviz     : 0.12.1\n",
       "numpy     : 1.22.4\n",
-      "aesara    : 2.5.1\n",
+      "pytensor    : 2.5.1\n",
       "\n",
       "Watermark: 2.3.1\n",
       "\n"
diff --git a/examples/case_studies/binning.myst.md b/examples/case_studies/binning.myst.md
index 6f3b4cbf1..1fc0fac22 100644
--- a/examples/case_studies/binning.myst.md
+++ b/examples/case_studies/binning.myst.md
@@ -69,7 +69,7 @@ In ordinal regression, the cutpoints are treated as latent variables and the par
 We are now in a position to sketch out a generative PyMC model:
 
 ```python
-import aesara.tensor as at
+import pytensor.tensor as at
 
 with pm.Model() as model:
     # priors
@@ -110,12 +110,12 @@ The approach was illustrated with a Gaussian distribution, and below we show a n
 
 import warnings
 
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 import seaborn as sns
 
 warnings.filterwarnings(action="ignore", category=UserWarning)
diff --git a/examples/case_studies/blackbox_external_likelihood_numpy.ipynb b/examples/case_studies/blackbox_external_likelihood_numpy.ipynb
index 88936d160..8666a2b6b 100644
--- a/examples/case_studies/blackbox_external_likelihood_numpy.ipynb
+++ b/examples/case_studies/blackbox_external_likelihood_numpy.ipynb
@@ -40,14 +40,14 @@
     }
    ],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import IPython\n",
     "import matplotlib\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "print(f\"Running on PyMC v{pm.__version__}\")"
    ]
@@ -91,9 +91,9 @@
     "    m = my_external_func(a, b)  # <--- this is not going to work!\n",
     "```\n",
     "\n",
-    "Another issue is that if you want to be able to use the gradient-based step samplers like {class}`pymc.NUTS` and {class}`Hamiltonian Monte Carlo (HMC) <pymc.HamiltonianMC>`, then your model/likelihood needs a gradient to be defined. If you have a model that is defined as a set of Aesara operators then this is no problem - internally it will be able to do automatic differentiation - but if your model is essentially a \"black box\" then you won't necessarily know what the gradients are.\n",
+    "Another issue is that if you want to be able to use the gradient-based step samplers like {class}`pymc.NUTS` and {class}`Hamiltonian Monte Carlo (HMC) <pymc.HamiltonianMC>`, then your model/likelihood needs a gradient to be defined. If you have a model that is defined as a set of PyTensor operators then this is no problem - internally it will be able to do automatic differentiation - but if your model is essentially a \"black box\" then you won't necessarily know what the gradients are.\n",
     "\n",
-    "Defining a model/likelihood that PyMC can use and that calls your \"black box\" function is possible, but it relies on creating a [custom Aesara Op](https://docs.pymc.io/advanced_aesara.html#writing-custom-aesara-ops). This is, hopefully, a clear description of how to do this, including one way of writing a gradient function that could be generally applicable.\n",
+    "Defining a model/likelihood that PyMC can use and that calls your \"black box\" function is possible, but it relies on creating a [custom PyTensor Op](https://docs.pymc.io/advanced_pytensor.html#writing-custom-pytensor-ops). This is, hopefully, a clear description of how to do this, including one way of writing a gradient function that could be generally applicable.\n",
     "\n",
     "In the examples below, we create a very simple model and log-likelihood function in numpy."
    ]
@@ -154,16 +154,16 @@
     "ValueError: setting an array element with a sequence.\n",
     "```\n",
     "\n",
-    "This is because `m` and `c` are Aesara tensor-type objects.\n",
+    "This is because `m` and `c` are PyTensor tensor-type objects.\n",
     "\n",
-    "So, what we actually need to do is create a [Aesara Op](http://deeplearning.net/software/aesara/extending/extending_aesara.html). This will be a new class that wraps our log-likelihood function (or just our model function, if that is all that is required) into something that can take in Aesara tensor objects, but internally can cast them as floating point values that can be passed to our log-likelihood function. We will do this below, initially without defining a [grad() method](http://deeplearning.net/software/aesara/extending/op.html#grad) for the Op."
+    "So, what we actually need to do is create a [PyTensor Op](http://deeplearning.net/software/pytensor/extending/extending_pytensor.html). This will be a new class that wraps our log-likelihood function (or just our model function, if that is all that is required) into something that can take in PyTensor tensor objects, but internally can cast them as floating point values that can be passed to our log-likelihood function. We will do this below, initially without defining a [grad() method](http://deeplearning.net/software/pytensor/extending/op.html#grad) for the Op."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Aesara Op without grad"
+    "## PyTensor Op without grad"
    ]
   },
   {
@@ -179,7 +179,7 @@
    },
    "outputs": [],
    "source": [
-    "# define a aesara Op for our likelihood function\n",
+    "# define a pytensor Op for our likelihood function\n",
     "class LogLike(at.Op):\n",
     "\n",
     "    \"\"\"\n",
@@ -352,9 +352,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Aesara Op with grad\n",
+    "## PyTensor Op with grad\n",
     "\n",
-    "What if we wanted to use NUTS or HMC? If we knew the analytical derivatives of the model/likelihood function then we could add a [grad() method](http://deeplearning.net/software/aesara/extending/op.html#grad) to the Op using that analytical form.\n",
+    "What if we wanted to use NUTS or HMC? If we knew the analytical derivatives of the model/likelihood function then we could add a [grad() method](http://deeplearning.net/software/pytensor/extending/op.html#grad) to the Op using that analytical form.\n",
     "\n",
     "But, what if we don't know the analytical form. If our model/likelihood is purely Python and made up of standard maths operators and Numpy functions, then the [autograd](https://github.com/HIPS/autograd) module could potentially be used to find gradients (also, see [here](https://github.com/ActiveState/code/blob/master/recipes/Python/580610_Auto_differentiation/recipe-580610.py) for a nice Python example of automatic differentiation). But, if our model/likelihood truly is a \"black box\" then we can just use the good-old-fashioned [finite difference](https://en.wikipedia.org/wiki/Finite_difference) to find the gradients - this can be slow, especially if there are a large number of variables, or the model takes a long time to evaluate. Below, a function to find gradients has been defined that uses the finite difference (the central difference) - it uses an iterative method with successively smaller interval sizes to check that the gradient converges. But, you could do something far simpler and just use, for example, the SciPy [approx_fprime](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.approx_fprime.html) function.\n",
     "\n",
@@ -409,7 +409,7 @@
    "source": [
     "So, now we can just redefine our Op with a `grad()` method, right?\n",
     "\n",
-    "It's not quite so simple! The `grad()` method itself requires that its inputs are Aesara tensor variables, whereas our `gradients` function above, like our `my_loglike` function, wants a list of floating point values. So, we need to define another Op that calculates the gradients. Below, I define a new version of the `LogLike` Op, called `LogLikeWithGrad` this time, that has a `grad()` method. This is followed by anothor Op called `LogLikeGrad` that, when called with a vector of Aesara tensor variables, returns another vector of values that are the gradients (i.e., the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)) of our log-likelihood function at those values. Note that the `grad()` method itself does not return the gradients directly, but instead returns the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)-vector product (you can hopefully just copy what I've done and not worry about what this means too much!)."
+    "It's not quite so simple! The `grad()` method itself requires that its inputs are PyTensor tensor variables, whereas our `gradients` function above, like our `my_loglike` function, wants a list of floating point values. So, we need to define another Op that calculates the gradients. Below, I define a new version of the `LogLike` Op, called `LogLikeWithGrad` this time, that has a `grad()` method. This is followed by anothor Op called `LogLikeGrad` that, when called with a vector of PyTensor tensor variables, returns another vector of values that are the gradients (i.e., the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)) of our log-likelihood function at those values. Note that the `grad()` method itself does not return the gradients directly, but instead returns the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)-vector product (you can hopefully just copy what I've done and not worry about what this means too much!)."
    ]
   },
   {
@@ -425,7 +425,7 @@
    },
    "outputs": [],
    "source": [
-    "# define a aesara Op for our likelihood function\n",
+    "# define a pytensor Op for our likelihood function\n",
     "class LogLikeWithGrad(at.Op):\n",
     "\n",
     "    itypes = [at.dvector]  # expects a vector of parameter values when called\n",
@@ -819,7 +819,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can now check that the gradient Op works as expected. First, just create and call the `LogLikeGrad` class, which should return the gradient directly (note that we have to create a [Aesara function](http://deeplearning.net/software/aesara/library/compile/function.html) to convert the output of the Op to an array). Secondly, we call the gradient from `LogLikeWithGrad` by using the [Aesara tensor gradient](http://deeplearning.net/software/aesara/library/gradient.html#aesara.gradient.grad) function. Finally, we will check the gradient returned by the PyMC model for a Normal distribution, which should be the same as the log-likelihood function we defined. In all cases we evaluate the gradients at the true values of the model function (the straight line) that was created."
+    "We can now check that the gradient Op works as expected. First, just create and call the `LogLikeGrad` class, which should return the gradient directly (note that we have to create a [PyTensor function](http://deeplearning.net/software/pytensor/library/compile/function.html) to convert the output of the Op to an array). Secondly, we call the gradient from `LogLikeWithGrad` by using the [PyTensor tensor gradient](http://deeplearning.net/software/pytensor/library/gradient.html#pytensor.gradient.grad) function. Finally, we will check the gradient returned by the PyMC model for a Normal distribution, which should be the same as the log-likelihood function we defined. In all cases we evaluate the gradients at the true values of the model function (the straight line) that was created."
    ]
   },
   {
@@ -838,41 +838,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ERROR (aesara.graph.opt): Optimization failure due to: transform_values\n",
-      "ERROR (aesara.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D040>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
-      "ERROR (aesara.graph.opt): TRACEBACK:\n",
-      "ERROR (aesara.graph.opt): Traceback (most recent call last):\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1992, in process_node\n",
+      "ERROR (pytensor.graph.opt): Optimization failure due to: transform_values\n",
+      "ERROR (pytensor.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D040>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
+      "ERROR (pytensor.graph.opt): TRACEBACK:\n",
+      "ERROR (pytensor.graph.opt): Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1992, in process_node\n",
       "    replacements = lopt.transform(fgraph, node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1203, in transform\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1203, in transform\n",
       "    return self.fn(fgraph, node)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 148, in transform_values\n",
       "    new_value_var = transformed_variable(\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 286, in __call__\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 286, in __call__\n",
       "    compute_test_value(node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 128, in compute_test_value\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 128, in compute_test_value\n",
       "    required = thunk()\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 508, in rval\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 508, in rval\n",
       "    r = p(n, [x[0] for x in i], o)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 48, in perform\n",
       "    raise NotImplementedError(\n",
       "NotImplementedError: These `Op`s should be removed from graphs used for computation.\n",
       "\n",
-      "ERROR (aesara.graph.opt): Optimization failure due to: transform_values\n",
-      "ERROR (aesara.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D140>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
-      "ERROR (aesara.graph.opt): TRACEBACK:\n",
-      "ERROR (aesara.graph.opt): Traceback (most recent call last):\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1992, in process_node\n",
+      "ERROR (pytensor.graph.opt): Optimization failure due to: transform_values\n",
+      "ERROR (pytensor.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D140>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
+      "ERROR (pytensor.graph.opt): TRACEBACK:\n",
+      "ERROR (pytensor.graph.opt): Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1992, in process_node\n",
       "    replacements = lopt.transform(fgraph, node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1203, in transform\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1203, in transform\n",
       "    return self.fn(fgraph, node)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 148, in transform_values\n",
       "    new_value_var = transformed_variable(\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 286, in __call__\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 286, in __call__\n",
       "    compute_test_value(node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 128, in compute_test_value\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 128, in compute_test_value\n",
       "    required = thunk()\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 508, in rval\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 508, in rval\n",
       "    r = p(n, [x[0] for x in i], o)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 48, in perform\n",
       "    raise NotImplementedError(\n",
@@ -892,41 +892,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ERROR (aesara.graph.opt): Optimization failure due to: transform_values\n",
-      "ERROR (aesara.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D140>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
-      "ERROR (aesara.graph.opt): TRACEBACK:\n",
-      "ERROR (aesara.graph.opt): Traceback (most recent call last):\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1992, in process_node\n",
+      "ERROR (pytensor.graph.opt): Optimization failure due to: transform_values\n",
+      "ERROR (pytensor.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D140>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
+      "ERROR (pytensor.graph.opt): TRACEBACK:\n",
+      "ERROR (pytensor.graph.opt): Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1992, in process_node\n",
       "    replacements = lopt.transform(fgraph, node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1203, in transform\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1203, in transform\n",
       "    return self.fn(fgraph, node)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 148, in transform_values\n",
       "    new_value_var = transformed_variable(\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 286, in __call__\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 286, in __call__\n",
       "    compute_test_value(node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 128, in compute_test_value\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 128, in compute_test_value\n",
       "    required = thunk()\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 508, in rval\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 508, in rval\n",
       "    r = p(n, [x[0] for x in i], o)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 48, in perform\n",
       "    raise NotImplementedError(\n",
       "NotImplementedError: These `Op`s should be removed from graphs used for computation.\n",
       "\n",
-      "ERROR (aesara.graph.opt): Optimization failure due to: transform_values\n",
-      "ERROR (aesara.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D040>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
-      "ERROR (aesara.graph.opt): TRACEBACK:\n",
-      "ERROR (aesara.graph.opt): Traceback (most recent call last):\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1992, in process_node\n",
+      "ERROR (pytensor.graph.opt): Optimization failure due to: transform_values\n",
+      "ERROR (pytensor.graph.opt): node: uniform_rv{0, (0, 0), floatX, False}(RandomStateSharedVariable(<RandomState(MT19937) at 0x1244107D040>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{-10.0}, TensorConstant{10.0})\n",
+      "ERROR (pytensor.graph.opt): TRACEBACK:\n",
+      "ERROR (pytensor.graph.opt): Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1992, in process_node\n",
       "    replacements = lopt.transform(fgraph, node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\opt.py\", line 1203, in transform\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\opt.py\", line 1203, in transform\n",
       "    return self.fn(fgraph, node)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 148, in transform_values\n",
       "    new_value_var = transformed_variable(\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 286, in __call__\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 286, in __call__\n",
       "    compute_test_value(node)\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 128, in compute_test_value\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 128, in compute_test_value\n",
       "    required = thunk()\n",
-      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aesara\\graph\\op.py\", line 508, in rval\n",
+      "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\pytensor\\graph\\op.py\", line 508, in rval\n",
       "    r = p(n, [x[0] for x in i], o)\n",
       "  File \"C:\\Users\\zufal\\miniconda3\\envs\\pm3v4\\lib\\site-packages\\aeppl\\transforms.py\", line 48, in perform\n",
       "    raise NotImplementedError(\n",
@@ -944,12 +944,12 @@
    ],
    "source": [
     "# test the gradient Op by direct call\n",
-    "aesara.config.compute_test_value = \"ignore\"\n",
-    "aesara.config.exception_verbosity = \"high\"\n",
+    "pytensor.config.compute_test_value = \"ignore\"\n",
+    "pytensor.config.exception_verbosity = \"high\"\n",
     "\n",
     "var = at.dvector()\n",
     "test_grad_op = LogLikeGrad(data, x, sigma)\n",
-    "test_grad_op_func = aesara.function([var], test_grad_op(var))\n",
+    "test_grad_op_func = pytensor.function([var], test_grad_op(var))\n",
     "grad_vals = test_grad_op_func([mtrue, ctrue])\n",
     "\n",
     "print(f'Gradient returned by \"LogLikeGrad\": {grad_vals}')\n",
@@ -957,7 +957,7 @@
     "# test the gradient called through LogLikeWithGrad\n",
     "test_gradded_op = LogLikeWithGrad(my_loglike, data, x, sigma)\n",
     "test_gradded_op_grad = at.grad(test_gradded_op(var), var)\n",
-    "test_gradded_op_grad_func = aesara.function([var], test_gradded_op_grad)\n",
+    "test_gradded_op_grad_func = pytensor.function([var], test_gradded_op_grad)\n",
     "grad_vals_2 = test_gradded_op_grad_func([mtrue, ctrue])\n",
     "\n",
     "print(f'Gradient returned by \"LogLikeWithGrad\": {grad_vals_2}')\n",
@@ -990,7 +990,7 @@
    "source": [
     "## Authors\n",
     "\n",
-    "* Adapted from [Jørgen Midtbø](https://github.com/jorgenem/)'s [example](https://discourse.pymc.io/t/connecting-pymc-to-external-code-help-with-understanding-aesara-custom-ops/670) by Matt Pitkin both as a [blogpost](http://mattpitkin.github.io/samplers-demo/pages/pymc-blackbox-likelihood/) and as an example notebook to this gallery in August, 2018 ([pymc#3169](https://github.com/pymc-devs/pymc/pull/3169) and [pymc#3177](https://github.com/pymc-devs/pymc/pull/3177))\n",
+    "* Adapted from [Jørgen Midtbø](https://github.com/jorgenem/)'s [example](https://discourse.pymc.io/t/connecting-pymc-to-external-code-help-with-understanding-pytensor-custom-ops/670) by Matt Pitkin both as a [blogpost](http://mattpitkin.github.io/samplers-demo/pages/pymc-blackbox-likelihood/) and as an example notebook to this gallery in August, 2018 ([pymc#3169](https://github.com/pymc-devs/pymc/pull/3169) and [pymc#3177](https://github.com/pymc-devs/pymc/pull/3177))\n",
     "* Updated by [Oriol Abril](https://github.com/OriolAbril) on December 2021 to drop the Cython dependency from the original notebook and use numpy instead ([pymc-examples#28](https://github.com/pymc-devs/pymc-examples/pull/28))"
    ]
   },
@@ -1027,7 +1027,7 @@
       "\n",
       "numpy     : 1.21.1\n",
       "arviz     : 0.11.4\n",
-      "aesara    : 2.3.2\n",
+      "pytensor    : 2.3.2\n",
       "matplotlib: 3.4.2\n",
       "pymc      : 4.0.0b1\n",
       "IPython   : 7.30.1\n",
diff --git a/examples/case_studies/blackbox_external_likelihood_numpy.myst.md b/examples/case_studies/blackbox_external_likelihood_numpy.myst.md
index 9e100178d..6b83a6984 100644
--- a/examples/case_studies/blackbox_external_likelihood_numpy.myst.md
+++ b/examples/case_studies/blackbox_external_likelihood_numpy.myst.md
@@ -25,14 +25,14 @@ uses numpy whereas {ref}`this other one <blackbox_external_likelihood>` uses Cyt
 :::
 
 ```{code-cell} ipython3
-import aesara
-import aesara.tensor as at
 import arviz as az
 import IPython
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 
 print(f"Running on PyMC v{pm.__version__}")
 ```
@@ -60,9 +60,9 @@ with pm.Model():
     m = my_external_func(a, b)  # <--- this is not going to work!
 ```
 
-Another issue is that if you want to be able to use the gradient-based step samplers like {class}`pymc.NUTS` and {class}`Hamiltonian Monte Carlo (HMC) <pymc.HamiltonianMC>`, then your model/likelihood needs a gradient to be defined. If you have a model that is defined as a set of Aesara operators then this is no problem - internally it will be able to do automatic differentiation - but if your model is essentially a "black box" then you won't necessarily know what the gradients are.
+Another issue is that if you want to be able to use the gradient-based step samplers like {class}`pymc.NUTS` and {class}`Hamiltonian Monte Carlo (HMC) <pymc.HamiltonianMC>`, then your model/likelihood needs a gradient to be defined. If you have a model that is defined as a set of PyTensor operators then this is no problem - internally it will be able to do automatic differentiation - but if your model is essentially a "black box" then you won't necessarily know what the gradients are.
 
-Defining a model/likelihood that PyMC can use and that calls your "black box" function is possible, but it relies on creating a [custom Aesara Op](https://docs.pymc.io/advanced_aesara.html#writing-custom-aesara-ops). This is, hopefully, a clear description of how to do this, including one way of writing a gradient function that could be generally applicable.
+Defining a model/likelihood that PyMC can use and that calls your "black box" function is possible, but it relies on creating a [custom PyTensor Op](https://docs.pymc.io/advanced_pytensor.html#writing-custom-pytensor-ops). This is, hopefully, a clear description of how to do this, including one way of writing a gradient function that could be generally applicable.
 
 In the examples below, we create a very simple model and log-likelihood function in numpy.
 
@@ -106,16 +106,16 @@ But, this will give an error like:
 ValueError: setting an array element with a sequence.
 ```
 
-This is because `m` and `c` are Aesara tensor-type objects.
+This is because `m` and `c` are PyTensor tensor-type objects.
 
-So, what we actually need to do is create a [Aesara Op](http://deeplearning.net/software/aesara/extending/extending_aesara.html). This will be a new class that wraps our log-likelihood function (or just our model function, if that is all that is required) into something that can take in Aesara tensor objects, but internally can cast them as floating point values that can be passed to our log-likelihood function. We will do this below, initially without defining a [grad() method](http://deeplearning.net/software/aesara/extending/op.html#grad) for the Op.
+So, what we actually need to do is create a [PyTensor Op](http://deeplearning.net/software/pytensor/extending/extending_pytensor.html). This will be a new class that wraps our log-likelihood function (or just our model function, if that is all that is required) into something that can take in PyTensor tensor objects, but internally can cast them as floating point values that can be passed to our log-likelihood function. We will do this below, initially without defining a [grad() method](http://deeplearning.net/software/pytensor/extending/op.html#grad) for the Op.
 
 +++
 
-## Aesara Op without grad
+## PyTensor Op without grad
 
 ```{code-cell} ipython3
-# define a aesara Op for our likelihood function
+# define a pytensor Op for our likelihood function
 class LogLike(at.Op):
 
     """
@@ -201,9 +201,9 @@ with pm.Model():
 az.plot_trace(idata_mh, lines=[("m", {}, mtrue), ("c", {}, ctrue)]);
 ```
 
-## Aesara Op with grad
+## PyTensor Op with grad
 
-What if we wanted to use NUTS or HMC? If we knew the analytical derivatives of the model/likelihood function then we could add a [grad() method](http://deeplearning.net/software/aesara/extending/op.html#grad) to the Op using that analytical form.
+What if we wanted to use NUTS or HMC? If we knew the analytical derivatives of the model/likelihood function then we could add a [grad() method](http://deeplearning.net/software/pytensor/extending/op.html#grad) to the Op using that analytical form.
 
 But, what if we don't know the analytical form. If our model/likelihood is purely Python and made up of standard maths operators and Numpy functions, then the [autograd](https://github.com/HIPS/autograd) module could potentially be used to find gradients (also, see [here](https://github.com/ActiveState/code/blob/master/recipes/Python/580610_Auto_differentiation/recipe-580610.py) for a nice Python example of automatic differentiation). But, if our model/likelihood truly is a "black box" then we can just use the good-old-fashioned [finite difference](https://en.wikipedia.org/wiki/Finite_difference) to find the gradients - this can be slow, especially if there are a large number of variables, or the model takes a long time to evaluate. Below, a function to find gradients has been defined that uses the finite difference (the central difference) - it uses an iterative method with successively smaller interval sizes to check that the gradient converges. But, you could do something far simpler and just use, for example, the SciPy [approx_fprime](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.approx_fprime.html) function.
 
@@ -241,10 +241,10 @@ def normal_gradients(theta, x, data, sigma):
 
 So, now we can just redefine our Op with a `grad()` method, right?
 
-It's not quite so simple! The `grad()` method itself requires that its inputs are Aesara tensor variables, whereas our `gradients` function above, like our `my_loglike` function, wants a list of floating point values. So, we need to define another Op that calculates the gradients. Below, I define a new version of the `LogLike` Op, called `LogLikeWithGrad` this time, that has a `grad()` method. This is followed by anothor Op called `LogLikeGrad` that, when called with a vector of Aesara tensor variables, returns another vector of values that are the gradients (i.e., the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)) of our log-likelihood function at those values. Note that the `grad()` method itself does not return the gradients directly, but instead returns the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)-vector product (you can hopefully just copy what I've done and not worry about what this means too much!).
+It's not quite so simple! The `grad()` method itself requires that its inputs are PyTensor tensor variables, whereas our `gradients` function above, like our `my_loglike` function, wants a list of floating point values. So, we need to define another Op that calculates the gradients. Below, I define a new version of the `LogLike` Op, called `LogLikeWithGrad` this time, that has a `grad()` method. This is followed by anothor Op called `LogLikeGrad` that, when called with a vector of PyTensor tensor variables, returns another vector of values that are the gradients (i.e., the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)) of our log-likelihood function at those values. Note that the `grad()` method itself does not return the gradients directly, but instead returns the [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant)-vector product (you can hopefully just copy what I've done and not worry about what this means too much!).
 
 ```{code-cell} ipython3
-# define a aesara Op for our likelihood function
+# define a pytensor Op for our likelihood function
 class LogLikeWithGrad(at.Op):
 
     itypes = [at.dvector]  # expects a vector of parameter values when called
@@ -409,16 +409,16 @@ pair_kwargs["marginal_kwargs"]["color"] = "C2"
 az.plot_pair(idata, **pair_kwargs, ax=ax);
 ```
 
-We can now check that the gradient Op works as expected. First, just create and call the `LogLikeGrad` class, which should return the gradient directly (note that we have to create a [Aesara function](http://deeplearning.net/software/aesara/library/compile/function.html) to convert the output of the Op to an array). Secondly, we call the gradient from `LogLikeWithGrad` by using the [Aesara tensor gradient](http://deeplearning.net/software/aesara/library/gradient.html#aesara.gradient.grad) function. Finally, we will check the gradient returned by the PyMC model for a Normal distribution, which should be the same as the log-likelihood function we defined. In all cases we evaluate the gradients at the true values of the model function (the straight line) that was created.
+We can now check that the gradient Op works as expected. First, just create and call the `LogLikeGrad` class, which should return the gradient directly (note that we have to create a [PyTensor function](http://deeplearning.net/software/pytensor/library/compile/function.html) to convert the output of the Op to an array). Secondly, we call the gradient from `LogLikeWithGrad` by using the [PyTensor tensor gradient](http://deeplearning.net/software/pytensor/library/gradient.html#pytensor.gradient.grad) function. Finally, we will check the gradient returned by the PyMC model for a Normal distribution, which should be the same as the log-likelihood function we defined. In all cases we evaluate the gradients at the true values of the model function (the straight line) that was created.
 
 ```{code-cell} ipython3
 # test the gradient Op by direct call
-aesara.config.compute_test_value = "ignore"
-aesara.config.exception_verbosity = "high"
+pytensor.config.compute_test_value = "ignore"
+pytensor.config.exception_verbosity = "high"
 
 var = at.dvector()
 test_grad_op = LogLikeGrad(data, x, sigma)
-test_grad_op_func = aesara.function([var], test_grad_op(var))
+test_grad_op_func = pytensor.function([var], test_grad_op(var))
 grad_vals = test_grad_op_func([mtrue, ctrue])
 
 print(f'Gradient returned by "LogLikeGrad": {grad_vals}')
@@ -426,7 +426,7 @@ print(f'Gradient returned by "LogLikeGrad": {grad_vals}')
 # test the gradient called through LogLikeWithGrad
 test_gradded_op = LogLikeWithGrad(my_loglike, data, x, sigma)
 test_gradded_op_grad = at.grad(test_gradded_op(var), var)
-test_gradded_op_grad_func = aesara.function([var], test_gradded_op_grad)
+test_gradded_op_grad_func = pytensor.function([var], test_gradded_op_grad)
 grad_vals_2 = test_gradded_op_grad_func([mtrue, ctrue])
 
 print(f'Gradient returned by "LogLikeWithGrad": {grad_vals_2}')
@@ -452,7 +452,7 @@ We could also do some profiling to compare performance between implementations.
 
 ## Authors
 
-* Adapted from [Jørgen Midtbø](https://github.com/jorgenem/)'s [example](https://discourse.pymc.io/t/connecting-pymc-to-external-code-help-with-understanding-aesara-custom-ops/670) by Matt Pitkin both as a [blogpost](http://mattpitkin.github.io/samplers-demo/pages/pymc-blackbox-likelihood/) and as an example notebook to this gallery in August, 2018 ([pymc#3169](https://github.com/pymc-devs/pymc/pull/3169) and [pymc#3177](https://github.com/pymc-devs/pymc/pull/3177))
+* Adapted from [Jørgen Midtbø](https://github.com/jorgenem/)'s [example](https://discourse.pymc.io/t/connecting-pymc-to-external-code-help-with-understanding-pytensor-custom-ops/670) by Matt Pitkin both as a [blogpost](http://mattpitkin.github.io/samplers-demo/pages/pymc-blackbox-likelihood/) and as an example notebook to this gallery in August, 2018 ([pymc#3169](https://github.com/pymc-devs/pymc/pull/3169) and [pymc#3177](https://github.com/pymc-devs/pymc/pull/3177))
 * Updated by [Oriol Abril](https://github.com/OriolAbril) on December 2021 to drop the Cython dependency from the original notebook and use numpy instead ([pymc-examples#28](https://github.com/pymc-devs/pymc-examples/pull/28))
 
 +++
diff --git a/examples/case_studies/factor_analysis.ipynb b/examples/case_studies/factor_analysis.ipynb
index 7d73bd7ac..cc019c28f 100644
--- a/examples/case_studies/factor_analysis.ipynb
+++ b/examples/case_studies/factor_analysis.ipynb
@@ -43,11 +43,11 @@
     }
    ],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import scipy as sp\n",
     "import seaborn as sns\n",
     "import xarray as xr\n",
@@ -205,8 +205,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [W, F, psi]\n"
      ]
@@ -335,18 +335,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def expand_packed_block_triangular(d, k, packed, diag=None, mtype=\"aesara\"):\n",
+    "def expand_packed_block_triangular(d, k, packed, diag=None, mtype=\"pytensor\"):\n",
     "    # like expand_packed_triangular, but with d > k.\n",
-    "    assert mtype in {\"aesara\", \"numpy\"}\n",
+    "    assert mtype in {\"pytensor\", \"numpy\"}\n",
     "    assert d >= k\n",
     "\n",
     "    def set_(M, i_, v_):\n",
-    "        if mtype == \"aesara\":\n",
+    "        if mtype == \"pytensor\":\n",
     "            return at.set_subtensor(M[i_], v_)\n",
     "        M[i_] = v_\n",
     "        return M\n",
     "\n",
-    "    out = at.zeros((d, k), dtype=float) if mtype == \"aesara\" else np.zeros((d, k), dtype=float)\n",
+    "    out = at.zeros((d, k), dtype=float) if mtype == \"pytensor\" else np.zeros((d, k), dtype=float)\n",
     "    if diag is None:\n",
     "        idxs = np.tril_indices(d, m=k)\n",
     "        out = set_(out, idxs, packed)\n",
@@ -401,8 +401,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [W_z, W_b, F, psi]\n"
      ]
@@ -516,8 +516,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n"
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n"
      ]
     },
     {
@@ -785,7 +785,7 @@
       "pymc           : 4.0.0b6\n",
       "matplotlib     : 3.5.1\n",
       "arviz          : 0.12.0\n",
-      "aesara         : 2.5.1\n",
+      "pytensor         : 2.5.1\n",
       "scipy          : 1.8.0\n",
       "xarray         : 2022.3.0\n",
       "seaborn        : 0.11.2\n",
diff --git a/examples/case_studies/factor_analysis.myst.md b/examples/case_studies/factor_analysis.myst.md
index 1b27e3a4e..cc811b814 100644
--- a/examples/case_studies/factor_analysis.myst.md
+++ b/examples/case_studies/factor_analysis.myst.md
@@ -32,11 +32,11 @@ Factor analysis is a widely used probabilistic model for identifying low-rank st
 :::
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib
 import numpy as np
 import pymc as pm
+import pytensor.tensor as at
 import scipy as sp
 import seaborn as sns
 import xarray as xr
@@ -148,18 +148,18 @@ This can be fixed by constraining the form of W to be:
 We can adapt `expand_block_triangular` to fill out a non-square matrix. This function mimics `pm.expand_packed_triangular`, but while the latter only works on packed versions of square matrices (i.e. $d=k$ in our model, the former can also be used with nonsquare matrices.
 
 ```{code-cell} ipython3
-def expand_packed_block_triangular(d, k, packed, diag=None, mtype="aesara"):
+def expand_packed_block_triangular(d, k, packed, diag=None, mtype="pytensor"):
     # like expand_packed_triangular, but with d > k.
-    assert mtype in {"aesara", "numpy"}
+    assert mtype in {"pytensor", "numpy"}
     assert d >= k
 
     def set_(M, i_, v_):
-        if mtype == "aesara":
+        if mtype == "pytensor":
             return at.set_subtensor(M[i_], v_)
         M[i_] = v_
         return M
 
-    out = at.zeros((d, k), dtype=float) if mtype == "aesara" else np.zeros((d, k), dtype=float)
+    out = at.zeros((d, k), dtype=float) if mtype == "pytensor" else np.zeros((d, k), dtype=float)
     if diag is None:
         idxs = np.tril_indices(d, m=k)
         out = set_(out, idxs, packed)
diff --git a/examples/case_studies/hierarchical_partial_pooling.ipynb b/examples/case_studies/hierarchical_partial_pooling.ipynb
index 51ddcbf94..146f5317c 100644
--- a/examples/case_studies/hierarchical_partial_pooling.ipynb
+++ b/examples/case_studies/hierarchical_partial_pooling.ipynb
@@ -60,12 +60,12 @@
    },
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "%matplotlib inline"
    ]
@@ -599,7 +599,7 @@
       "arviz     : 0.12.1\n",
       "sys       : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]\n",
       "pymc      : 4.1.2\n",
-      "aesara    : 2.7.5\n",
+      "pytensor    : 2.7.5\n",
       "\n",
       "Watermark: 2.3.1\n",
       "\n"
diff --git a/examples/case_studies/hierarchical_partial_pooling.myst.md b/examples/case_studies/hierarchical_partial_pooling.myst.md
index 91b405387..c69bde334 100644
--- a/examples/case_studies/hierarchical_partial_pooling.myst.md
+++ b/examples/case_studies/hierarchical_partial_pooling.myst.md
@@ -50,12 +50,12 @@ The idea of hierarchical partial pooling is to model the global performance, and
 For far more in-depth discussion please refer to Stan [tutorial](http://mc-stan.org/documentation/case-studies/pool-binary-trials.html) {cite:p}`carpenter2016hierarchical` on the subject. The model and parameter values were taken from that example.
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 
 %matplotlib inline
 ```
diff --git a/examples/case_studies/item_response_nba.ipynb b/examples/case_studies/item_response_nba.ipynb
index af223156e..8769a2a65 100644
--- a/examples/case_studies/item_response_nba.ipynb
+++ b/examples/case_studies/item_response_nba.ipynb
@@ -1155,7 +1155,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.2.0\n",
       "\n",
-      "aesara: 2.6.2\n",
+      "pytensor: 2.6.2\n",
       "aeppl : 0.0.28\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -1172,7 +1172,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/item_response_nba.myst.md b/examples/case_studies/item_response_nba.myst.md
index 73fa3ae0b..8261e8fff 100644
--- a/examples/case_studies/item_response_nba.myst.md
+++ b/examples/case_studies/item_response_nba.myst.md
@@ -521,7 +521,7 @@ A warm thank you goes to [Eric Ma](https://github.com/ericmjl) for many useful c
 :tags: []
 
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/mediation_analysis.ipynb b/examples/case_studies/mediation_analysis.ipynb
index aa2f5d722..8f5614fed 100644
--- a/examples/case_studies/mediation_analysis.ipynb
+++ b/examples/case_studies/mediation_analysis.ipynb
@@ -815,7 +815,7 @@
       "Python version       : 3.9.9\n",
       "IPython version      : 7.31.0\n",
       "\n",
-      "aesara: 2.3.8\n",
+      "pytensor: 2.3.8\n",
       "aeppl : 0.0.18\n",
       "xarray: 0.20.2\n",
       "\n",
@@ -832,7 +832,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/mediation_analysis.myst.md b/examples/case_studies/mediation_analysis.myst.md
index 26bcd34b6..10c9795de 100644
--- a/examples/case_studies/mediation_analysis.myst.md
+++ b/examples/case_studies/mediation_analysis.myst.md
@@ -233,7 +233,7 @@ As stated at the outset, the procedures used in mediation analysis have evolved
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/moderation_analysis.ipynb b/examples/case_studies/moderation_analysis.ipynb
index 045e7ed28..ad7635dda 100644
--- a/examples/case_studies/moderation_analysis.ipynb
+++ b/examples/case_studies/moderation_analysis.ipynb
@@ -859,7 +859,7 @@
       "Python version       : 3.9.9\n",
       "IPython version      : 7.31.0\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "xarray: 0.20.2\n",
       "\n",
@@ -877,7 +877,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/moderation_analysis.myst.md b/examples/case_studies/moderation_analysis.myst.md
index e918436c7..6ebd97c8a 100644
--- a/examples/case_studies/moderation_analysis.myst.md
+++ b/examples/case_studies/moderation_analysis.myst.md
@@ -375,7 +375,7 @@ But readers are strongly encouraged to read {cite:t}`mcclelland2017multicollinea
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/multilevel_modeling.ipynb b/examples/case_studies/multilevel_modeling.ipynb
index 0f498c962..c573613f6 100644
--- a/examples/case_studies/multilevel_modeling.ipynb
+++ b/examples/case_studies/multilevel_modeling.ipynb
@@ -85,12 +85,12 @@
     "import os\n",
     "import warnings\n",
     "\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "import xarray as xr\n",
     "\n",
@@ -3958,7 +3958,7 @@
       "pandas    : 1.5.2\n",
       "seaborn   : 0.12.1\n",
       "numpy     : 1.21.6\n",
-      "aesara    : 2.8.7\n",
+      "pytensor    : 2.8.7\n",
       "\n",
       "Watermark: 2.3.0\n",
       "\n"
diff --git a/examples/case_studies/multilevel_modeling.myst.md b/examples/case_studies/multilevel_modeling.myst.md
index da9bd5c17..822a2bf33 100644
--- a/examples/case_studies/multilevel_modeling.myst.md
+++ b/examples/case_studies/multilevel_modeling.myst.md
@@ -65,12 +65,12 @@ First, we import the data from a local file, and extract Minnesota's data.
 import os
 import warnings
 
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 import seaborn as sns
 import xarray as xr
 
diff --git a/examples/case_studies/probabilistic_matrix_factorization.ipynb b/examples/case_studies/probabilistic_matrix_factorization.ipynb
index a99085379..7aa735f3f 100644
--- a/examples/case_studies/probabilistic_matrix_factorization.ipynb
+++ b/examples/case_studies/probabilistic_matrix_factorization.ipynb
@@ -827,12 +827,12 @@
     "import logging\n",
     "import time\n",
     "\n",
-    "import aesara\n",
+    "import pytensor\n",
     "import scipy as sp\n",
     "\n",
     "# Enable on-the-fly graph computations, but ignore\n",
     "# absence of intermediate test values.\n",
-    "aesara.config.compute_test_value = \"ignore\"\n",
+    "pytensor.config.compute_test_value = \"ignore\"\n",
     "\n",
     "# Set up logging.\n",
     "logger = logging.getLogger()\n",
@@ -1360,8 +1360,8 @@
       "INFO:pymc:Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
       "INFO:pymc:Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/severinhatt/miniconda3/envs/pymc-hack/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/severinhatt/miniconda3/envs/pymc-hack/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [U, V]\n",
@@ -1721,7 +1721,7 @@
       "Python version       : 3.9.7\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -1731,7 +1731,7 @@
       "scipy     : 1.7.3\n",
       "matplotlib: 3.5.2\n",
       "logging   : 0.5.1.2\n",
-      "aesara    : 2.5.1\n",
+      "pytensor    : 2.5.1\n",
       "numpy     : 1.22.4\n",
       "xarray    : 2022.3.0\n",
       "\n",
@@ -1742,7 +1742,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/probabilistic_matrix_factorization.myst.md b/examples/case_studies/probabilistic_matrix_factorization.myst.md
index d46d526ef..6881087fd 100644
--- a/examples/case_studies/probabilistic_matrix_factorization.myst.md
+++ b/examples/case_studies/probabilistic_matrix_factorization.myst.md
@@ -302,12 +302,12 @@ Given small precision parameters, the priors on $U$ and $V$ ensure our latent va
 import logging
 import time
 
-import aesara
+import pytensor
 import scipy as sp
 
 # Enable on-the-fly graph computations, but ignore
 # absence of intermediate test values.
-aesara.config.compute_test_value = "ignore"
+pytensor.config.compute_test_value = "ignore"
 
 # Set up logging.
 logger = logging.getLogger()
@@ -775,7 +775,7 @@ goldberg2001eigentaste
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/putting_workflow.ipynb b/examples/case_studies/putting_workflow.ipynb
index 62542ac3d..6735c9bff 100644
--- a/examples/case_studies/putting_workflow.ipynb
+++ b/examples/case_studies/putting_workflow.ipynb
@@ -37,12 +37,12 @@
    "source": [
     "import io\n",
     "\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import scipy\n",
     "import scipy.stats as st\n",
     "import xarray as xr\n",
@@ -333,8 +333,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [a, b]\n"
      ]
@@ -841,8 +841,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n"
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n"
      ]
     },
     {
@@ -1528,8 +1528,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [variance_of_shot, variance_of_distance]\n"
      ]
@@ -1958,8 +1958,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [variance_of_shot, variance_of_distance, dispersion]\n"
      ]
@@ -2376,7 +2376,7 @@
       "xarray_einstats: 0.2.0\n",
       "\n",
       "xarray    : 2022.3.0\n",
-      "aesara    : 2.5.1\n",
+      "pytensor    : 2.5.1\n",
       "pymc      : 4.0.0b6\n",
       "arviz     : 0.11.4\n",
       "pandas    : 1.4.1\n",
diff --git a/examples/case_studies/putting_workflow.myst.md b/examples/case_studies/putting_workflow.myst.md
index 26fc9fdcb..2d63e9a39 100644
--- a/examples/case_studies/putting_workflow.myst.md
+++ b/examples/case_studies/putting_workflow.myst.md
@@ -32,12 +32,12 @@ We use a data set from "Statistics: A Bayesian Perspective" {cite:p}`berry1996st
 ```{code-cell} ipython3
 import io
 
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 import scipy
 import scipy.stats as st
 import xarray as xr
diff --git a/examples/case_studies/reinforcement_learning.ipynb b/examples/case_studies/reinforcement_learning.ipynb
index cce994061..19b12631b 100644
--- a/examples/case_studies/reinforcement_learning.ipynb
+++ b/examples/case_studies/reinforcement_learning.ipynb
@@ -10,7 +10,7 @@
     "# Fitting a Reinforcement Learning Model to Behavioral Data with PyMC\n",
     "\n",
     ":::{post} Aug 5, 2022\n",
-    ":tags: Aesara, Reinforcement Learning\n",
+    ":tags: PyTensor, Reinforcement Learning\n",
     ":category: advanced, how-to\n",
     ":author: Ricardo Vieira\n",
     ":::\n",
@@ -48,12 +48,12 @@
    },
    "outputs": [],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "import scipy\n",
     "\n",
     "from matplotlib.lines import Line2D"
@@ -223,7 +223,7 @@
     "\n",
     "I employ the handy scipy.optimize.minimize function, to quickly retrieve the values of $\\alpha$ and $\\beta$ that maximize the likelihood of the data (or actually, minimize the negative log likelihood).\n",
     "\n",
-    "This was also helpful when I later wrote the Aesara function that computed the choice probabilities in PyMC. First, the underlying logic is the same, the only thing that changes is the syntax. Second, it provides a way to be confident that I did not mess up, and what I was actually computing was what I intended to."
+    "This was also helpful when I later wrote the PyTensor function that computed the choice probabilities in PyMC. First, the underlying logic is the same, the only thing that changes is the syntax. Second, it provides a way to be confident that I did not mess up, and what I was actually computing was what I intended to."
    ]
   },
   {
@@ -270,7 +270,7 @@
     "\n",
     "In the end, the function returns the negative sum of all the log probabilities, which is equivalent to multiplying the probabilities in their original scale.\n",
     "\n",
-    "(The first action is ignored just to make the output comparable to the later Aesara function. It doesn't actually change any estimation, as the initial probabilities are fixed and do not depend on either the $\\alpha$ or $\\beta$ parameters.)"
+    "(The first action is ignored just to make the output comparable to the later PyTensor function. It doesn't actually change any estimation, as the initial probabilities are fixed and do not depend on either the $\\alpha$ or $\\beta$ parameters.)"
    ]
   },
   {
@@ -361,7 +361,7 @@
    "source": [
     "The estimated MLE values are relatively close to the true ones. However, this procedure does not give any idea of the plausible uncertainty around these parameter values. To get that, I'll turn to PyMC for a bayesian posterior estimation.\n",
     "\n",
-    "But before that, I will implement a simple vectorization optimization to the log-likelihood function that will be more similar to the Aesara counterpart. The reason for this is to speed up the slow bayesian inference engine down the road."
+    "But before that, I will implement a simple vectorization optimization to the log-likelihood function that will be more similar to the PyTensor counterpart. The reason for this is to speed up the slow bayesian inference engine down the road."
    ]
   },
   {
@@ -478,7 +478,7 @@
    "source": [
     "The vectorized function gives the same results, but runs almost one order of magnitude faster. \n",
     "\n",
-    "When implemented as an Aesara function, the difference between the vectorized and standard versions was not this drastic. Still, it ran twice as fast, which meant the model also sampled at twice the speed it would otherwise have!"
+    "When implemented as an PyTensor function, the difference between the vectorized and standard versions was not this drastic. Still, it ran twice as fast, which meant the model also sampled at twice the speed it would otherwise have!"
    ]
   },
   {
@@ -489,7 +489,7 @@
    "source": [
     "## Estimating the learning parameters via PyMC\n",
     "\n",
-    "The most challenging part was to create an Aesara function/loop to estimate the Q values when sampling our parameters with PyMC."
+    "The most challenging part was to create an PyTensor function/loop to estimate the Q values when sampling our parameters with PyMC."
    ]
   },
   {
@@ -503,8 +503,8 @@
     "def update_Q(action, reward, Qs, alpha):\n",
     "    \"\"\"\n",
     "    This function updates the Q table according to the RL update rule.\n",
-    "    It will be called by aesara.scan to do so recursevely, given the observed data and the alpha parameter\n",
-    "    This could have been replaced be the following lamba expression in the aesara.scan fn argument:\n",
+    "    It will be called by pytensor.scan to do so recursevely, given the observed data and the alpha parameter\n",
+    "    This could have been replaced be the following lamba expression in the pytensor.scan fn argument:\n",
     "        fn=lamba action, reward, Qs, alpha: at.set_subtensor(Qs[action], Qs[action] + alpha * (reward - Qs[action]))\n",
     "    \"\"\"\n",
     "\n",
@@ -520,7 +520,7 @@
    },
    "outputs": [],
    "source": [
-    "# Transform the variables into appropriate Aesara objects\n",
+    "# Transform the variables into appropriate PyTensor objects\n",
     "rewards_ = at.as_tensor_variable(rewards, dtype=\"int32\")\n",
     "actions_ = at.as_tensor_variable(actions, dtype=\"int32\")\n",
     "\n",
@@ -531,7 +531,7 @@
     "Qs = 0.5 * at.ones((2,), dtype=\"float64\")\n",
     "\n",
     "# Compute the Q values for each trial\n",
-    "Qs, _ = aesara.scan(\n",
+    "Qs, _ = pytensor.scan(\n",
     "    fn=update_Q, sequences=[actions_, rewards_], outputs_info=[Qs], non_sequences=[alpha]\n",
     ")\n",
     "\n",
@@ -577,10 +577,10 @@
     }
    ],
    "source": [
-    "aesara_llik_td = aesara.function(\n",
+    "pytensor_llik_td = pytensor.function(\n",
     "    inputs=[alpha, beta], outputs=neg_loglike, on_unused_input=\"ignore\"\n",
     ")\n",
-    "result = aesara_llik_td(true_alpha, true_beta)\n",
+    "result = pytensor_llik_td(true_alpha, true_beta)\n",
     "float(result)"
    ]
   },
@@ -590,7 +590,7 @@
     "id": "AmcoU1CF5ix-"
    },
    "source": [
-    "The same result is obtained, so we can be confident that the Aesara loop is working as expected. We are now ready to implement the PyMC model."
+    "The same result is obtained, so we can be confident that the PyTensor loop is working as expected. We are now ready to implement the PyMC model."
    ]
   },
   {
@@ -601,13 +601,13 @@
    },
    "outputs": [],
    "source": [
-    "def aesara_llik_td(alpha, beta, actions, rewards):\n",
+    "def pytensor_llik_td(alpha, beta, actions, rewards):\n",
     "    rewards = at.as_tensor_variable(rewards, dtype=\"int32\")\n",
     "    actions = at.as_tensor_variable(actions, dtype=\"int32\")\n",
     "\n",
     "    # Compute the Qs values\n",
     "    Qs = 0.5 * at.ones((2,), dtype=\"float64\")\n",
-    "    Qs, updates = aesara.scan(\n",
+    "    Qs, updates = pytensor.scan(\n",
     "        fn=update_Q, sequences=[actions, rewards], outputs_info=[Qs], non_sequences=[alpha]\n",
     "    )\n",
     "\n",
@@ -699,7 +699,7 @@
     "    alpha = pm.Beta(name=\"alpha\", alpha=1, beta=1)\n",
     "    beta = pm.HalfNormal(name=\"beta\", sigma=10)\n",
     "\n",
-    "    like = pm.Potential(name=\"like\", var=aesara_llik_td(alpha, beta, actions, rewards))\n",
+    "    like = pm.Potential(name=\"like\", var=pytensor_llik_td(alpha, beta, actions, rewards))\n",
     "\n",
     "    tr = pm.sample(random_seed=rng)"
    ]
@@ -804,7 +804,7 @@
     "\n",
     "    # Compute the Qs values\n",
     "    Qs = 0.5 * at.ones((2,), dtype=\"float64\")\n",
-    "    Qs, updates = aesara.scan(\n",
+    "    Qs, updates = pytensor.scan(\n",
     "        fn=update_Q, sequences=[actions, rewards], outputs_info=[Qs], non_sequences=[alpha]\n",
     "    )\n",
     "\n",
@@ -985,7 +985,7 @@
       "aeppl : 0.0.34\n",
       "xarray: 2022.6.0\n",
       "\n",
-      "aesara    : 2.7.9\n",
+      "pytensor    : 2.7.9\n",
       "arviz     : 0.12.1\n",
       "pymc      : 4.1.5\n",
       "sys       : 3.9.13 (main, May 24 2022, 21:28:31) \n",
diff --git a/examples/case_studies/reinforcement_learning.myst.md b/examples/case_studies/reinforcement_learning.myst.md
index 88117ce46..0cfd6fae7 100644
--- a/examples/case_studies/reinforcement_learning.myst.md
+++ b/examples/case_studies/reinforcement_learning.myst.md
@@ -16,7 +16,7 @@ kernelspec:
 # Fitting a Reinforcement Learning Model to Behavioral Data with PyMC
 
 :::{post} Aug 5, 2022
-:tags: Aesara, Reinforcement Learning
+:tags: PyTensor, Reinforcement Learning
 :category: advanced, how-to
 :author: Ricardo Vieira
 :::
@@ -48,12 +48,12 @@ where the $\beta \in (0, +\infty)$ parameter determines the level of noise in th
 ```{code-cell} ipython3
 :id: QTq-0HMw7dBK
 
-import aesara
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 import scipy
 
 from matplotlib.lines import Line2D
@@ -171,7 +171,7 @@ Having generated the data, the goal is to now 'invert the model' to estimate the
 
 I employ the handy scipy.optimize.minimize function, to quickly retrieve the values of $\alpha$ and $\beta$ that maximize the likelihood of the data (or actually, minimize the negative log likelihood).
 
-This was also helpful when I later wrote the Aesara function that computed the choice probabilities in PyMC. First, the underlying logic is the same, the only thing that changes is the syntax. Second, it provides a way to be confident that I did not mess up, and what I was actually computing was what I intended to.
+This was also helpful when I later wrote the PyTensor function that computed the choice probabilities in PyMC. First, the underlying logic is the same, the only thing that changes is the syntax. Second, it provides a way to be confident that I did not mess up, and what I was actually computing was what I intended to.
 
 ```{code-cell} ipython3
 :id: lWGlRE3BjR0E
@@ -208,7 +208,7 @@ The function `scipy.special.logsumexp` is used to compute the term $\log(\exp(\b
 
 In the end, the function returns the negative sum of all the log probabilities, which is equivalent to multiplying the probabilities in their original scale.
 
-(The first action is ignored just to make the output comparable to the later Aesara function. It doesn't actually change any estimation, as the initial probabilities are fixed and do not depend on either the $\alpha$ or $\beta$ parameters.)
+(The first action is ignored just to make the output comparable to the later PyTensor function. It doesn't actually change any estimation, as the initial probabilities are fixed and do not depend on either the $\alpha$ or $\beta$ parameters.)
 
 ```{code-cell} ipython3
 ---
@@ -247,7 +247,7 @@ print(f"MLE: beta = {result.x[1]:.2f} (true value = {true_beta})")
 
 The estimated MLE values are relatively close to the true ones. However, this procedure does not give any idea of the plausible uncertainty around these parameter values. To get that, I'll turn to PyMC for a bayesian posterior estimation.
 
-But before that, I will implement a simple vectorization optimization to the log-likelihood function that will be more similar to the Aesara counterpart. The reason for this is to speed up the slow bayesian inference engine down the road.
+But before that, I will implement a simple vectorization optimization to the log-likelihood function that will be more similar to the PyTensor counterpart. The reason for this is to speed up the slow bayesian inference engine down the road.
 
 ```{code-cell} ipython3
 :id: 4knb5sKW9V66
@@ -312,13 +312,13 @@ outputId: 94bf3268-0eab-4ce9-deb9-5d1527b3c19d
 
 The vectorized function gives the same results, but runs almost one order of magnitude faster. 
 
-When implemented as an Aesara function, the difference between the vectorized and standard versions was not this drastic. Still, it ran twice as fast, which meant the model also sampled at twice the speed it would otherwise have!
+When implemented as an PyTensor function, the difference between the vectorized and standard versions was not this drastic. Still, it ran twice as fast, which meant the model also sampled at twice the speed it would otherwise have!
 
 +++ {"id": "tC7xbCCIL7K4"}
 
 ## Estimating the learning parameters via PyMC
 
-The most challenging part was to create an Aesara function/loop to estimate the Q values when sampling our parameters with PyMC.
+The most challenging part was to create an PyTensor function/loop to estimate the Q values when sampling our parameters with PyMC.
 
 ```{code-cell} ipython3
 :id: u8L_FAB4hle1
@@ -326,8 +326,8 @@ The most challenging part was to create an Aesara function/loop to estimate the
 def update_Q(action, reward, Qs, alpha):
     """
     This function updates the Q table according to the RL update rule.
-    It will be called by aesara.scan to do so recursevely, given the observed data and the alpha parameter
-    This could have been replaced be the following lamba expression in the aesara.scan fn argument:
+    It will be called by pytensor.scan to do so recursevely, given the observed data and the alpha parameter
+    This could have been replaced be the following lamba expression in the pytensor.scan fn argument:
         fn=lamba action, reward, Qs, alpha: at.set_subtensor(Qs[action], Qs[action] + alpha * (reward - Qs[action]))
     """
 
@@ -338,7 +338,7 @@ def update_Q(action, reward, Qs, alpha):
 ```{code-cell} ipython3
 :id: dHzhTy20g4vh
 
-# Transform the variables into appropriate Aesara objects
+# Transform the variables into appropriate PyTensor objects
 rewards_ = at.as_tensor_variable(rewards, dtype="int32")
 actions_ = at.as_tensor_variable(actions, dtype="int32")
 
@@ -349,7 +349,7 @@ beta = at.scalar("beta")
 Qs = 0.5 * at.ones((2,), dtype="float64")
 
 # Compute the Q values for each trial
-Qs, _ = aesara.scan(
+Qs, _ = pytensor.scan(
     fn=update_Q, sequences=[actions_, rewards_], outputs_info=[Qs], non_sequences=[alpha]
 )
 
@@ -374,27 +374,27 @@ colab:
 id: g1hkTd75xxwo
 outputId: a2310fd3-cac2-48c6-9d22-3c3b72410427
 ---
-aesara_llik_td = aesara.function(
+pytensor_llik_td = pytensor.function(
     inputs=[alpha, beta], outputs=neg_loglike, on_unused_input="ignore"
 )
-result = aesara_llik_td(true_alpha, true_beta)
+result = pytensor_llik_td(true_alpha, true_beta)
 float(result)
 ```
 
 +++ {"id": "AmcoU1CF5ix-"}
 
-The same result is obtained, so we can be confident that the Aesara loop is working as expected. We are now ready to implement the PyMC model.
+The same result is obtained, so we can be confident that the PyTensor loop is working as expected. We are now ready to implement the PyMC model.
 
 ```{code-cell} ipython3
 :id: c70L4ZBT7QLr
 
-def aesara_llik_td(alpha, beta, actions, rewards):
+def pytensor_llik_td(alpha, beta, actions, rewards):
     rewards = at.as_tensor_variable(rewards, dtype="int32")
     actions = at.as_tensor_variable(actions, dtype="int32")
 
     # Compute the Qs values
     Qs = 0.5 * at.ones((2,), dtype="float64")
-    Qs, updates = aesara.scan(
+    Qs, updates = pytensor.scan(
         fn=update_Q, sequences=[actions, rewards], outputs_info=[Qs], non_sequences=[alpha]
     )
 
@@ -419,7 +419,7 @@ with pm.Model() as m:
     alpha = pm.Beta(name="alpha", alpha=1, beta=1)
     beta = pm.HalfNormal(name="beta", sigma=10)
 
-    like = pm.Potential(name="like", var=aesara_llik_td(alpha, beta, actions, rewards))
+    like = pm.Potential(name="like", var=pytensor_llik_td(alpha, beta, actions, rewards))
 
     tr = pm.sample(random_seed=rng)
 ```
@@ -469,7 +469,7 @@ def right_action_probs(alpha, beta, actions, rewards):
 
     # Compute the Qs values
     Qs = 0.5 * at.ones((2,), dtype="float64")
-    Qs, updates = aesara.scan(
+    Qs, updates = pytensor.scan(
         fn=update_Q, sequences=[actions, rewards], outputs_info=[Qs], non_sequences=[alpha]
     )
 
diff --git a/examples/case_studies/rugby_analytics.ipynb b/examples/case_studies/rugby_analytics.ipynb
index b3ccbbbad..7a3c05e4d 100644
--- a/examples/case_studies/rugby_analytics.ipynb
+++ b/examples/case_studies/rugby_analytics.ipynb
@@ -62,12 +62,12 @@
    "source": [
     "!date\n",
     "\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "\n",
     "from matplotlib.ticker import StrMethodFormatter\n",
@@ -725,8 +725,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [home, sd_att, sd_def, intercept, atts_star, defs_star]\n"
      ]
@@ -823,7 +823,7 @@
    "source": [
     "* We specified the model and the likelihood function\n",
     "\n",
-    "* All this runs on an Aesara graph under the hood"
+    "* All this runs on an PyTensor graph under the hood"
    ]
   },
   {
@@ -2161,8 +2161,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n"
+      "/home/oriol/miniconda3/envs/arviz/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n"
      ]
     },
     {
@@ -2850,7 +2850,7 @@
       "pandas    : 1.4.1\n",
       "seaborn   : 0.11.2\n",
       "pymc      : 4.0.0b6\n",
-      "aesara    : 2.5.1\n",
+      "pytensor    : 2.5.1\n",
       "arviz     : 0.12.0\n",
       "\n",
       "Watermark: 2.3.0\n",
diff --git a/examples/case_studies/rugby_analytics.myst.md b/examples/case_studies/rugby_analytics.myst.md
index cbbd739f3..6a30835aa 100644
--- a/examples/case_studies/rugby_analytics.myst.md
+++ b/examples/case_studies/rugby_analytics.myst.md
@@ -47,12 +47,12 @@ Source for Results 2014 are Wikipedia. I've added the subsequent years, 2015, 20
 ```{code-cell} ipython3
 !date
 
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 import seaborn as sns
 
 from matplotlib.ticker import StrMethodFormatter
@@ -275,7 +275,7 @@ with pm.Model(coords=coords) as model:
 
 * We specified the model and the likelihood function
 
-* All this runs on an Aesara graph under the hood
+* All this runs on an PyTensor graph under the hood
 
 ```{code-cell} ipython3
 az.plot_trace(trace, var_names=["intercept", "home", "sd_att", "sd_def"], compact=False);
diff --git a/examples/case_studies/spline.ipynb b/examples/case_studies/spline.ipynb
index 538ad73b0..28662b8f6 100644
--- a/examples/case_studies/spline.ipynb
+++ b/examples/case_studies/spline.ipynb
@@ -1385,7 +1385,7 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.5\n",
+      "pytensor: 2.7.5\n",
       "xarray: 2022.3.0\n",
       "patsy : 0.5.2\n",
       "\n",
@@ -1402,7 +1402,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,xarray,patsy"
+    "%watermark -n -u -v -iv -w -p pytensor,xarray,patsy"
    ]
   },
   {
diff --git a/examples/case_studies/spline.myst.md b/examples/case_studies/spline.myst.md
index b6bffb2e2..0bd352db2 100644
--- a/examples/case_studies/spline.myst.md
+++ b/examples/case_studies/spline.myst.md
@@ -289,7 +289,7 @@ plt.fill_between(
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,xarray,patsy
+%watermark -n -u -v -iv -w -p pytensor,xarray,patsy
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/case_studies/stochastic_volatility.ipynb b/examples/case_studies/stochastic_volatility.ipynb
index eb03cc71a..131d67fc9 100644
--- a/examples/case_studies/stochastic_volatility.ipynb
+++ b/examples/case_studies/stochastic_volatility.ipynb
@@ -730,7 +730,7 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.6.6\n",
+      "pytensor: 2.6.6\n",
       "aeppl : 0.0.31\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -747,7 +747,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/stochastic_volatility.myst.md b/examples/case_studies/stochastic_volatility.myst.md
index 141465111..980737787 100644
--- a/examples/case_studies/stochastic_volatility.myst.md
+++ b/examples/case_studies/stochastic_volatility.myst.md
@@ -188,7 +188,7 @@ axes[1].set_title("Posterior volatility");
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 ## References
diff --git a/examples/case_studies/wrapping_jax_function.ipynb b/examples/case_studies/wrapping_jax_function.ipynb
index 617bf3f59..52e5c4b3e 100644
--- a/examples/case_studies/wrapping_jax_function.ipynb
+++ b/examples/case_studies/wrapping_jax_function.ipynb
@@ -8,7 +8,7 @@
     "# How to wrap a JAX function for use in PyMC\n",
     "\n",
     ":::{post} Mar 24, 2022\n",
-    ":tags: Aesara, hidden markov model, JAX \n",
+    ":tags: PyTensor, hidden markov model, JAX \n",
     ":category: advanced, how-to\n",
     ":author: Ricardo Vieira\n",
     ":::"
@@ -20,14 +20,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "\n",
-    "from aesara.graph import Apply, Op"
+    "from pytensor.graph import Apply, Op"
    ]
   },
   {
@@ -58,7 +58,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/ricardo/miniconda3/envs/pymc-examples/lib/python3.10/site-packages/aesara/link/jax/dispatch.py:87: UserWarning: JAX omnistaging couldn't be disabled: Disabling of omnistaging is no longer supported in JAX version 0.2.12 and higher: see https://github.com/google/jax/blob/main/design_notes/omnistaging.md.\n",
+      "/home/ricardo/miniconda3/envs/pymc-examples/lib/python3.10/site-packages/pytensor/link/jax/dispatch.py:87: UserWarning: JAX omnistaging couldn't be disabled: Disabling of omnistaging is no longer supported in JAX version 0.2.12 and higher: see https://github.com/google/jax/blob/main/design_notes/omnistaging.md.\n",
       "  warnings.warn(f\"JAX omnistaging couldn't be disabled: {e}\")\n",
       "/home/ricardo/Documents/Projects/pymc/pymc/sampling_jax.py:36: UserWarning: This module is experimental.\n",
       "  warnings.warn(\"This module is experimental.\")\n"
@@ -71,29 +71,29 @@
     "import jax.scipy as jsp\n",
     "import pymc.sampling_jax\n",
     "\n",
-    "from aesara.link.jax.dispatch import jax_funcify"
+    "from pytensor.link.jax.dispatch import jax_funcify"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Intro: Aesara and its backends\n",
+    "## Intro: PyTensor and its backends\n",
     "\n",
-    "PyMC uses the {doc}`Aesara <aesara:index>` library to create and manipulate probabilistic graphs. Aesara is backend-agnostic, meaning it can make use of functions written in different languages or frameworks, including pure Python, NumPy, C, Cython, Numba, and [JAX](https://jax.readthedocs.io/en/latest/index.html). \n",
+    "PyMC uses the {doc}`PyTensor <pytensor:index>` library to create and manipulate probabilistic graphs. PyTensor is backend-agnostic, meaning it can make use of functions written in different languages or frameworks, including pure Python, NumPy, C, Cython, Numba, and [JAX](https://jax.readthedocs.io/en/latest/index.html). \n",
     "\n",
-    "All that is needed is to encapsulate such function in a Aesara {class}`~aesara.graph.op.Op`, which enforces a specific API regarding how inputs and outputs of pure \"operations\" should be handled. It also implements methods for optional extra functionality like symbolic shape inference and automatic differentiation. This is well covered in the Aesara {ref}`Op documentation <aesara:op_contract>` and in our {ref}`blackbox_external_likelihood_numpy` pymc-example.\n",
+    "All that is needed is to encapsulate such function in a PyTensor {class}`~pytensor.graph.op.Op`, which enforces a specific API regarding how inputs and outputs of pure \"operations\" should be handled. It also implements methods for optional extra functionality like symbolic shape inference and automatic differentiation. This is well covered in the PyTensor {ref}`Op documentation <pytensor:op_contract>` and in our {ref}`blackbox_external_likelihood_numpy` pymc-example.\n",
     "\n",
-    "More recently, Aesara became capable of compiling directly to some of these languages/frameworks, meaning that we can convert a complete Aesara graph into a JAX or NUMBA jitted function, whereas traditionally they could only be converted to Python or C.\n",
+    "More recently, PyTensor became capable of compiling directly to some of these languages/frameworks, meaning that we can convert a complete PyTensor graph into a JAX or NUMBA jitted function, whereas traditionally they could only be converted to Python or C.\n",
     "\n",
     "This has some interesting uses, such as sampling models defined in PyMC with pure JAX samplers, like those implemented in [NumPyro](https://num.pyro.ai/en/latest/index.html) or [BlackJax](https://github.com/blackjax-devs/blackjax). \n",
     "\n",
-    "This notebook illustrates how we can implement a new Aesara {class}`~aesara.graph.op.Op` that wraps a JAX function. \n",
+    "This notebook illustrates how we can implement a new PyTensor {class}`~pytensor.graph.op.Op` that wraps a JAX function. \n",
     "\n",
     "### Outline\n",
     "\n",
-    "1. We start in a similar path as that taken in the {ref}`blackbox_external_likelihood_numpy`, which wraps a NumPy function in a Aesara {class}`~aesara.graph.op.Op`, this time wrapping a JAX jitted function instead. \n",
-    "2. We then enable Aesara to \"unwrap\" the just wrapped JAX function, so that the whole graph can be compiled to JAX. We make use of this to sample our PyMC model via the JAX NumPyro NUTS sampler."
+    "1. We start in a similar path as that taken in the {ref}`blackbox_external_likelihood_numpy`, which wraps a NumPy function in a PyTensor {class}`~pytensor.graph.op.Op`, this time wrapping a JAX jitted function instead. \n",
+    "2. We then enable PyTensor to \"unwrap\" the just wrapped JAX function, so that the whole graph can be compiled to JAX. We make use of this to sample our PyMC model via the JAX NumPyro NUTS sampler."
    ]
   },
   {
@@ -534,7 +534,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's print out the gradient with respect to `emission_signal`. We will check this value is unchanged after we wrap our function in Aesara."
+    "Let's print out the gradient with respect to `emission_signal`. We will check this value is unchanged after we wrap our function in PyTensor."
    ]
   },
   {
@@ -567,21 +567,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Wrapping the JAX function in Aesara"
+    "## Wrapping the JAX function in PyTensor"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now we are ready to wrap our JAX jitted function in a Aesara {class}`~aesara.graph.op.Op`, that we can then use in our PyMC models. We recommend you check Aesara's official {ref}`Op documentation <aesara:op_contract>` if you want to understand it in more detail.\n",
+    "Now we are ready to wrap our JAX jitted function in a PyTensor {class}`~pytensor.graph.op.Op`, that we can then use in our PyMC models. We recommend you check PyTensor's official {ref}`Op documentation <pytensor:op_contract>` if you want to understand it in more detail.\n",
     "\n",
-    "In brief, we will inherit from {class}`~aesara.graph.op.Op` and define the following methods:\n",
-    "1. `make_node`: Creates an {class}`~aesara.graph.basic.Apply` node that holds together the symbolic inputs and outputs of our operation\n",
+    "In brief, we will inherit from {class}`~pytensor.graph.op.Op` and define the following methods:\n",
+    "1. `make_node`: Creates an {class}`~pytensor.graph.basic.Apply` node that holds together the symbolic inputs and outputs of our operation\n",
     "2. `perform`: Python code that returns the evaluation of our operation, given concrete input values\n",
-    "3. `grad`: Returns a Aesara symbolic graph that represents the gradient expression of an output cost wrt to its inputs\n",
+    "3. `grad`: Returns a PyTensor symbolic graph that represents the gradient expression of an output cost wrt to its inputs\n",
     "\n",
-    "For the `grad` we will create a second {class}`~aesara.graph.op.Op` that wraps our jitted grad version from above"
+    "For the `grad` we will create a second {class}`~pytensor.graph.op.Op` that wraps our jitted grad version from above"
    ]
   },
   {
@@ -613,7 +613,7 @@
     "\n",
     "    def perform(self, node, inputs, outputs):\n",
     "        result = jitted_vec_hmm_logp(*inputs)\n",
-    "        # Aesara raises an error if the dtype of the returned output is not\n",
+    "        # PyTensor raises an error if the dtype of the returned output is not\n",
     "        # exactly the one expected from the Apply node (in this case\n",
     "        # `dscalar`, which stands for float64 scalar), so we make sure\n",
     "        # to convert to the expected dtype. To avoid unnecessary conversions\n",
@@ -630,7 +630,7 @@
     "            grad_wrt_logp_transition,\n",
     "        ) = hmm_logp_grad_op(*inputs)\n",
     "        # If there are inputs for which the gradients will never be needed or cannot\n",
-    "        # be computed, `aesara.gradient.grad_not_implemented` should  be used as the\n",
+    "        # be computed, `pytensor.gradient.grad_not_implemented` should  be used as the\n",
     "        # output gradient for that input.\n",
     "        output_gradient = output_gradients[0]\n",
     "        return [\n",
@@ -752,7 +752,7 @@
     }
    },
    "source": [
-    "It's also useful to check the gradient of our {class}`~aesara.graph.op.Op` can be requested via the Aesara `grad` interface:"
+    "It's also useful to check the gradient of our {class}`~pytensor.graph.op.Op` can be requested via the PyTensor `grad` interface:"
    ]
   },
   {
@@ -1026,8 +1026,8 @@
       "INFO:pymc:Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
       "INFO:pymc:Initializing NUTS using jitter+adapt_diag...\n",
-      "/home/ricardo/Documents/Projects/pymc/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/home/ricardo/Documents/Projects/pymc/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Sequential sampling (2 chains in 1 job)\n",
       "INFO:pymc:Sequential sampling (2 chains in 1 job)\n",
       "NUTS: [emission_signal, emission_noise, p_initial_state, p_transition]\n",
@@ -1199,7 +1199,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As mentioned in the beginning, Aesara can compile an entire graph to JAX. To do this, it needs to know how each {class}`~aesara.graph.op.Op` in the graph can be converted to a JAX function. This can be done by {term}`dispatch <dispatching>` with {func}`aesara.link.jax.dispatch.jax_funcify`. Most of the default Aesara {class}`~aesara.graph.op.Op`s already have such a dispatch function, but we will need to add a new one for our custom `HMMLogpOp`, as Aesara has never seen that before.\n",
+    "As mentioned in the beginning, PyTensor can compile an entire graph to JAX. To do this, it needs to know how each {class}`~pytensor.graph.op.Op` in the graph can be converted to a JAX function. This can be done by {term}`dispatch <dispatching>` with {func}`pytensor.link.jax.dispatch.jax_funcify`. Most of the default PyTensor {class}`~pytensor.graph.op.Op`s already have such a dispatch function, but we will need to add a new one for our custom `HMMLogpOp`, as PyTensor has never seen that before.\n",
     "\n",
     "For that we need a function which returns (another) JAX function, that performs the same computation as in our `perform` method. Fortunately, we started exactly with such function, so this amounts to 3 short lines of code."
    ]
@@ -1220,12 +1220,12 @@
    "metadata": {},
    "source": [
     ":::{note}\n",
-    "We do not return the jitted function, so that the entire Aesara graph can be jitted together after being converted to JAX.\n",
+    "We do not return the jitted function, so that the entire PyTensor graph can be jitted together after being converted to JAX.\n",
     ":::\n",
     "\n",
-    "For a better understanding of {class}`~aesara.graph.op.Op` JAX conversions, we recommend reading Aesara's {doc}`Adding JAX and Numba support for Ops guide <aesara:extending/creating_a_numba_jax_op>`.\n",
+    "For a better understanding of {class}`~pytensor.graph.op.Op` JAX conversions, we recommend reading PyTensor's {doc}`Adding JAX and Numba support for Ops guide <pytensor:extending/creating_a_numba_jax_op>`.\n",
     "\n",
-    "We can test that our conversion function is working properly by compiling a {func}`aesara.function` with `mode=\"JAX\"`:"
+    "We can test that our conversion function is working properly by compiling a {func}`pytensor.function` with `mode=\"JAX\"`:"
    ]
   },
   {
@@ -1252,7 +1252,7 @@
     "    logp_initial_state_true,\n",
     "    logp_transition_true,\n",
     ")\n",
-    "jax_fn = aesara.function(inputs=[], outputs=out, mode=\"JAX\")\n",
+    "jax_fn = pytensor.function(inputs=[], outputs=out, mode=\"JAX\")\n",
     "jax_fn()"
    ]
   },
@@ -1292,9 +1292,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that we could have added an equally simple function to convert our `HMMLogpGradOp`, in case we wanted to convert Aesara gradient graphs to JAX. In our case, we don't need to do this because we will rely on JAX `grad` function (or more precisely, NumPyro will rely on it) to obtain these again from our compiled JAX function.\n",
+    "Note that we could have added an equally simple function to convert our `HMMLogpGradOp`, in case we wanted to convert PyTensor gradient graphs to JAX. In our case, we don't need to do this because we will rely on JAX `grad` function (or more precisely, NumPyro will rely on it) to obtain these again from our compiled JAX function.\n",
     "\n",
-    "We include a {ref}`short discussion <aesara_vs_jax>` at the end of this document, to help you better understand the trade-offs between working with Aesara graphs vs JAX functions, and when you might want to use one or the other."
+    "We include a {ref}`short discussion <pytensor_vs_jax>` at the end of this document, to help you better understand the trade-offs between working with PyTensor graphs vs JAX functions, and when you might want to use one or the other."
    ]
   },
   {
@@ -1322,8 +1322,8 @@
      "text": [
       "/home/ricardo/miniconda3/envs/pymc-examples/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "/home/ricardo/Documents/Projects/pymc/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n"
+      "/home/ricardo/Documents/Projects/pymc/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n"
      ]
     },
     {
@@ -1399,8 +1399,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "(aesara_vs_jax)=\n",
-    "## Some brief notes on using Aesara vs JAX"
+    "(pytensor_vs_jax)=\n",
+    "## Some brief notes on using PyTensor vs JAX"
    ]
   },
   {
@@ -1414,7 +1414,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As we have seen, it is pretty straightforward to interface between Aesara graphs and JAX functions. \n",
+    "As we have seen, it is pretty straightforward to interface between PyTensor graphs and JAX functions. \n",
     "\n",
     "This can be very handy when you want to combine previously implemented JAX function with PyMC models. We used a marginalized HMM log-likelihood in this example, but the same strategy could be used to do Bayesian inference with Deep Neural Networks or Differential Equations, or pretty much any other functions implemented in JAX that can be used in the context of a Bayesian model.\n",
     "\n",
@@ -1432,19 +1432,19 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Like JAX, Aesara has the goal of mimicking the NumPy and Scipy APIs, so that writing code in Aesara should feel very similar to how code is written in those libraries.\n",
+    "Like JAX, PyTensor has the goal of mimicking the NumPy and Scipy APIs, so that writing code in PyTensor should feel very similar to how code is written in those libraries.\n",
     "\n",
-    "There are, however, some of advantages to working with Aesara:\n",
+    "There are, however, some of advantages to working with PyTensor:\n",
     "\n",
-    "1. Aesara graphs are considerably easier to {ref}`inspect and debug <aesara:debug_faq>` than JAX functions\n",
-    "2. Aesara has clever {ref}`optimization and stabilization routines <aesara:optimizations>` that are not possible or implemented in JAX\n",
-    "3. Aesara graphs can be easily {ref}`manipulated after creation <aesara:graph_rewriting>`\n",
+    "1. PyTensor graphs are considerably easier to {ref}`inspect and debug <pytensor:debug_faq>` than JAX functions\n",
+    "2. PyTensor has clever {ref}`optimization and stabilization routines <pytensor:optimizations>` that are not possible or implemented in JAX\n",
+    "3. PyTensor graphs can be easily {ref}`manipulated after creation <pytensor:graph_rewriting>`\n",
     "\n",
-    "Point 2 means your graphs are likely to perform better if written in Aesara. In general you don't have to worry about using specialized functions like `log1p` or `logsumexp`, as Aesara will be able to detect the equivalent naive expressions and replace them by their specialized counterparts. Importantly, you still benefit from these optimizations when your graph is later compiled to JAX.\n",
+    "Point 2 means your graphs are likely to perform better if written in PyTensor. In general you don't have to worry about using specialized functions like `log1p` or `logsumexp`, as PyTensor will be able to detect the equivalent naive expressions and replace them by their specialized counterparts. Importantly, you still benefit from these optimizations when your graph is later compiled to JAX.\n",
     "\n",
-    "The catch is that Aesara cannot reason about JAX functions, and by association {class}`~aesara.graph.op.Op`s that wrap them. This means that the larger the portion of the graph is \"hidden\" inside a JAX function, the less a user will benefit from Aesara's rewrite and debugging abilities.\n",
+    "The catch is that PyTensor cannot reason about JAX functions, and by association {class}`~pytensor.graph.op.Op`s that wrap them. This means that the larger the portion of the graph is \"hidden\" inside a JAX function, the less a user will benefit from PyTensor's rewrite and debugging abilities.\n",
     "\n",
-    "Point 3 is more important for library developers. It is the main reason why PyMC developers opted to use Aesara (and before that, its predecessor Theano) as its backend. Many of the user-facing utilities provided by PyMC rely on the ability to easily parse and manipulate Aesara graphs."
+    "Point 3 is more important for library developers. It is the main reason why PyMC developers opted to use PyTensor (and before that, its predecessor Theano) as its backend. Many of the user-facing utilities provided by PyMC rely on the ability to easily parse and manipulate PyTensor graphs."
    ]
   },
   {
@@ -1458,11 +1458,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We had to create two {class}`~aesara.graph.op.Op`s, one for the function we cared about and a separate one for its gradients. However, JAX provides a `value_and_grad` utility that can return both the value of a function and its gradients. We can do something similar and get away with a single {class}`~aesara.graph.op.Op` if we are clever about it.\n",
+    "We had to create two {class}`~pytensor.graph.op.Op`s, one for the function we cared about and a separate one for its gradients. However, JAX provides a `value_and_grad` utility that can return both the value of a function and its gradients. We can do something similar and get away with a single {class}`~pytensor.graph.op.Op` if we are clever about it.\n",
     "\n",
     "By doing this we can (potentially) save memory and reuse computation that is shared between the function and its gradients. This may be relevant when working with very large JAX functions.\n",
     "\n",
-    "Note that this is only useful if you are interested in taking gradients with respect to your {class}`~aesara.graph.op.Op` using Aesara. If your endgoal is to compile your graph to JAX, and only then take the gradients (as NumPyro does), then it's better to use the first approach. You don't even need to implement the `grad` method and associated {class}`~aesara.graph.op.Op` in that case."
+    "Note that this is only useful if you are interested in taking gradients with respect to your {class}`~pytensor.graph.op.Op` using PyTensor. If your endgoal is to compile your graph to JAX, and only then take the gradients (as NumPyro does), then it's better to use the first approach. You don't even need to implement the `grad` method and associated {class}`~pytensor.graph.op.Op` in that case."
    ]
   },
   {
@@ -1511,7 +1511,7 @@
     "        # the gradient outputs! That would require computing the second order\n",
     "        # gradients\n",
     "        assert all(\n",
-    "            isinstance(g.type, aesara.gradient.DisconnectedType) for g in output_gradients[1:]\n",
+    "            isinstance(g.type, pytensor.gradient.DisconnectedType) for g in output_gradients[1:]\n",
     "        )\n",
     "\n",
     "        return [output_gradients[0] * grad for grad in gradients]\n",
@@ -1524,7 +1524,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We check again that we can take the gradient using Aesara `grad` interface"
+    "We check again that we can take the gradient using PyTensor `grad` interface"
    ]
   },
   {
@@ -1592,13 +1592,13 @@
       "Python version       : 3.10.2\n",
       "IPython version      : 8.1.1\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "xarray: 2022.3.0\n",
       "\n",
       "matplotlib: 3.5.1\n",
       "jax       : 0.3.4\n",
-      "aesara    : 2.5.1\n",
+      "pytensor    : 2.5.1\n",
       "arviz     : 0.12.0\n",
       "pymc      : 4.0.0b6\n",
       "numpy     : 1.22.3\n",
@@ -1610,7 +1610,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/case_studies/wrapping_jax_function.myst.md b/examples/case_studies/wrapping_jax_function.myst.md
index 49c25fc9b..8b9147ded 100644
--- a/examples/case_studies/wrapping_jax_function.myst.md
+++ b/examples/case_studies/wrapping_jax_function.myst.md
@@ -16,20 +16,20 @@ substitutions:
 # How to wrap a JAX function for use in PyMC
 
 :::{post} Mar 24, 2022
-:tags: Aesara, hidden markov model, JAX 
+:tags: PyTensor, hidden markov model, JAX 
 :category: advanced, how-to
 :author: Ricardo Vieira
 :::
 
 ```{code-cell} ipython3
-import aesara
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph import Apply, Op
+from pytensor.graph import Apply, Op
 ```
 
 ```{code-cell} ipython3
@@ -47,25 +47,25 @@ import jax.numpy as jnp
 import jax.scipy as jsp
 import pymc.sampling_jax
 
-from aesara.link.jax.dispatch import jax_funcify
+from pytensor.link.jax.dispatch import jax_funcify
 ```
 
-## Intro: Aesara and its backends
+## Intro: PyTensor and its backends
 
-PyMC uses the {doc}`Aesara <aesara:index>` library to create and manipulate probabilistic graphs. Aesara is backend-agnostic, meaning it can make use of functions written in different languages or frameworks, including pure Python, NumPy, C, Cython, Numba, and [JAX](https://jax.readthedocs.io/en/latest/index.html). 
+PyMC uses the {doc}`PyTensor <pytensor:index>` library to create and manipulate probabilistic graphs. PyTensor is backend-agnostic, meaning it can make use of functions written in different languages or frameworks, including pure Python, NumPy, C, Cython, Numba, and [JAX](https://jax.readthedocs.io/en/latest/index.html). 
 
-All that is needed is to encapsulate such function in a Aesara {class}`~aesara.graph.op.Op`, which enforces a specific API regarding how inputs and outputs of pure "operations" should be handled. It also implements methods for optional extra functionality like symbolic shape inference and automatic differentiation. This is well covered in the Aesara {ref}`Op documentation <aesara:op_contract>` and in our {ref}`blackbox_external_likelihood_numpy` pymc-example.
+All that is needed is to encapsulate such function in a PyTensor {class}`~pytensor.graph.op.Op`, which enforces a specific API regarding how inputs and outputs of pure "operations" should be handled. It also implements methods for optional extra functionality like symbolic shape inference and automatic differentiation. This is well covered in the PyTensor {ref}`Op documentation <pytensor:op_contract>` and in our {ref}`blackbox_external_likelihood_numpy` pymc-example.
 
-More recently, Aesara became capable of compiling directly to some of these languages/frameworks, meaning that we can convert a complete Aesara graph into a JAX or NUMBA jitted function, whereas traditionally they could only be converted to Python or C.
+More recently, PyTensor became capable of compiling directly to some of these languages/frameworks, meaning that we can convert a complete PyTensor graph into a JAX or NUMBA jitted function, whereas traditionally they could only be converted to Python or C.
 
 This has some interesting uses, such as sampling models defined in PyMC with pure JAX samplers, like those implemented in [NumPyro](https://num.pyro.ai/en/latest/index.html) or [BlackJax](https://github.com/blackjax-devs/blackjax). 
 
-This notebook illustrates how we can implement a new Aesara {class}`~aesara.graph.op.Op` that wraps a JAX function. 
+This notebook illustrates how we can implement a new PyTensor {class}`~pytensor.graph.op.Op` that wraps a JAX function. 
 
 ### Outline
 
-1. We start in a similar path as that taken in the {ref}`blackbox_external_likelihood_numpy`, which wraps a NumPy function in a Aesara {class}`~aesara.graph.op.Op`, this time wrapping a JAX jitted function instead. 
-2. We then enable Aesara to "unwrap" the just wrapped JAX function, so that the whole graph can be compiled to JAX. We make use of this to sample our PyMC model via the JAX NumPyro NUTS sampler.
+1. We start in a similar path as that taken in the {ref}`blackbox_external_likelihood_numpy`, which wraps a NumPy function in a PyTensor {class}`~pytensor.graph.op.Op`, this time wrapping a JAX jitted function instead. 
+2. We then enable PyTensor to "unwrap" the just wrapped JAX function, so that the whole graph can be compiled to JAX. We make use of this to sample our PyMC model via the JAX NumPyro NUTS sampler.
 
 +++
 
@@ -310,7 +310,7 @@ We will also ask JAX to give us the function of the gradients with respect to ea
 jitted_vec_hmm_logp_grad = jax.jit(jax.grad(vec_hmm_logp, argnums=list(range(5))))
 ```
 
-Let's print out the gradient with respect to `emission_signal`. We will check this value is unchanged after we wrap our function in Aesara.
+Let's print out the gradient with respect to `emission_signal`. We will check this value is unchanged after we wrap our function in PyTensor.
 
 ```{code-cell} ipython3
 jitted_vec_hmm_logp_grad(
@@ -322,18 +322,18 @@ jitted_vec_hmm_logp_grad(
 )[1]
 ```
 
-## Wrapping the JAX function in Aesara
+## Wrapping the JAX function in PyTensor
 
 +++
 
-Now we are ready to wrap our JAX jitted function in a Aesara {class}`~aesara.graph.op.Op`, that we can then use in our PyMC models. We recommend you check Aesara's official {ref}`Op documentation <aesara:op_contract>` if you want to understand it in more detail.
+Now we are ready to wrap our JAX jitted function in a PyTensor {class}`~pytensor.graph.op.Op`, that we can then use in our PyMC models. We recommend you check PyTensor's official {ref}`Op documentation <pytensor:op_contract>` if you want to understand it in more detail.
 
-In brief, we will inherit from {class}`~aesara.graph.op.Op` and define the following methods:
-1. `make_node`: Creates an {class}`~aesara.graph.basic.Apply` node that holds together the symbolic inputs and outputs of our operation
+In brief, we will inherit from {class}`~pytensor.graph.op.Op` and define the following methods:
+1. `make_node`: Creates an {class}`~pytensor.graph.basic.Apply` node that holds together the symbolic inputs and outputs of our operation
 2. `perform`: Python code that returns the evaluation of our operation, given concrete input values
-3. `grad`: Returns a Aesara symbolic graph that represents the gradient expression of an output cost wrt to its inputs
+3. `grad`: Returns a PyTensor symbolic graph that represents the gradient expression of an output cost wrt to its inputs
 
-For the `grad` we will create a second {class}`~aesara.graph.op.Op` that wraps our jitted grad version from above
+For the `grad` we will create a second {class}`~pytensor.graph.op.Op` that wraps our jitted grad version from above
 
 ```{code-cell} ipython3
 class HMMLogpOp(Op):
@@ -359,7 +359,7 @@ class HMMLogpOp(Op):
 
     def perform(self, node, inputs, outputs):
         result = jitted_vec_hmm_logp(*inputs)
-        # Aesara raises an error if the dtype of the returned output is not
+        # PyTensor raises an error if the dtype of the returned output is not
         # exactly the one expected from the Apply node (in this case
         # `dscalar`, which stands for float64 scalar), so we make sure
         # to convert to the expected dtype. To avoid unnecessary conversions
@@ -376,7 +376,7 @@ class HMMLogpOp(Op):
             grad_wrt_logp_transition,
         ) = hmm_logp_grad_op(*inputs)
         # If there are inputs for which the gradients will never be needed or cannot
-        # be computed, `aesara.gradient.grad_not_implemented` should  be used as the
+        # be computed, `pytensor.gradient.grad_not_implemented` should  be used as the
         # output gradient for that input.
         output_gradient = output_gradients[0]
         return [
@@ -455,7 +455,7 @@ hmm_logp_grad_op(
 
 +++ {"pycharm": {"name": "#%% md\n"}}
 
-It's also useful to check the gradient of our {class}`~aesara.graph.op.Op` can be requested via the Aesara `grad` interface:
+It's also useful to check the gradient of our {class}`~pytensor.graph.op.Op` can be requested via the PyTensor `grad` interface:
 
 ```{code-cell} ipython3
 # We define the symbolic `emission_signal` variable outside of the `Op`
@@ -570,7 +570,7 @@ The posteriors look reasonably centered around the true values used to generate
 
 +++
 
-As mentioned in the beginning, Aesara can compile an entire graph to JAX. To do this, it needs to know how each {class}`~aesara.graph.op.Op` in the graph can be converted to a JAX function. This can be done by {term}`dispatch <dispatching>` with {func}`aesara.link.jax.dispatch.jax_funcify`. Most of the default Aesara {class}`~aesara.graph.op.Op`s already have such a dispatch function, but we will need to add a new one for our custom `HMMLogpOp`, as Aesara has never seen that before.
+As mentioned in the beginning, PyTensor can compile an entire graph to JAX. To do this, it needs to know how each {class}`~pytensor.graph.op.Op` in the graph can be converted to a JAX function. This can be done by {term}`dispatch <dispatching>` with {func}`pytensor.link.jax.dispatch.jax_funcify`. Most of the default PyTensor {class}`~pytensor.graph.op.Op`s already have such a dispatch function, but we will need to add a new one for our custom `HMMLogpOp`, as PyTensor has never seen that before.
 
 For that we need a function which returns (another) JAX function, that performs the same computation as in our `perform` method. Fortunately, we started exactly with such function, so this amounts to 3 short lines of code.
 
@@ -581,12 +581,12 @@ def hmm_logp_dispatch(op, **kwargs):
 ```
 
 :::{note}
-We do not return the jitted function, so that the entire Aesara graph can be jitted together after being converted to JAX.
+We do not return the jitted function, so that the entire PyTensor graph can be jitted together after being converted to JAX.
 :::
 
-For a better understanding of {class}`~aesara.graph.op.Op` JAX conversions, we recommend reading Aesara's {doc}`Adding JAX and Numba support for Ops guide <aesara:extending/creating_a_numba_jax_op>`.
+For a better understanding of {class}`~pytensor.graph.op.Op` JAX conversions, we recommend reading PyTensor's {doc}`Adding JAX and Numba support for Ops guide <pytensor:extending/creating_a_numba_jax_op>`.
 
-We can test that our conversion function is working properly by compiling a {func}`aesara.function` with `mode="JAX"`:
+We can test that our conversion function is working properly by compiling a {func}`pytensor.function` with `mode="JAX"`:
 
 ```{code-cell} ipython3
 out = hmm_logp_op(
@@ -596,7 +596,7 @@ out = hmm_logp_op(
     logp_initial_state_true,
     logp_transition_true,
 )
-jax_fn = aesara.function(inputs=[], outputs=out, mode="JAX")
+jax_fn = pytensor.function(inputs=[], outputs=out, mode="JAX")
 jax_fn()
 ```
 
@@ -607,9 +607,9 @@ model_logp_jax_fn = model.compile_fn(model.logpt(sum=False), mode="JAX")
 model_logp_jax_fn(initial_point)
 ```
 
-Note that we could have added an equally simple function to convert our `HMMLogpGradOp`, in case we wanted to convert Aesara gradient graphs to JAX. In our case, we don't need to do this because we will rely on JAX `grad` function (or more precisely, NumPyro will rely on it) to obtain these again from our compiled JAX function.
+Note that we could have added an equally simple function to convert our `HMMLogpGradOp`, in case we wanted to convert PyTensor gradient graphs to JAX. In our case, we don't need to do this because we will rely on JAX `grad` function (or more precisely, NumPyro will rely on it) to obtain these again from our compiled JAX function.
 
-We include a {ref}`short discussion <aesara_vs_jax>` at the end of this document, to help you better understand the trade-offs between working with Aesara graphs vs JAX functions, and when you might want to use one or the other.
+We include a {ref}`short discussion <pytensor_vs_jax>` at the end of this document, to help you better understand the trade-offs between working with PyTensor graphs vs JAX functions, and when you might want to use one or the other.
 
 +++
 
@@ -638,8 +638,8 @@ Depending on the model and computer architecture you are using, a pure JAX sampl
 
 +++
 
-(aesara_vs_jax)=
-## Some brief notes on using Aesara vs JAX
+(pytensor_vs_jax)=
+## Some brief notes on using PyTensor vs JAX
 
 +++
 
@@ -647,7 +647,7 @@ Depending on the model and computer architecture you are using, a pure JAX sampl
 
 +++
 
-As we have seen, it is pretty straightforward to interface between Aesara graphs and JAX functions. 
+As we have seen, it is pretty straightforward to interface between PyTensor graphs and JAX functions. 
 
 This can be very handy when you want to combine previously implemented JAX function with PyMC models. We used a marginalized HMM log-likelihood in this example, but the same strategy could be used to do Bayesian inference with Deep Neural Networks or Differential Equations, or pretty much any other functions implemented in JAX that can be used in the context of a Bayesian model.
 
@@ -659,19 +659,19 @@ It can also be worth it, if you need to make use of JAX's unique features like v
 
 +++
 
-Like JAX, Aesara has the goal of mimicking the NumPy and Scipy APIs, so that writing code in Aesara should feel very similar to how code is written in those libraries.
+Like JAX, PyTensor has the goal of mimicking the NumPy and Scipy APIs, so that writing code in PyTensor should feel very similar to how code is written in those libraries.
 
-There are, however, some of advantages to working with Aesara:
+There are, however, some of advantages to working with PyTensor:
 
-1. Aesara graphs are considerably easier to {ref}`inspect and debug <aesara:debug_faq>` than JAX functions
-2. Aesara has clever {ref}`optimization and stabilization routines <aesara:optimizations>` that are not possible or implemented in JAX
-3. Aesara graphs can be easily {ref}`manipulated after creation <aesara:graph_rewriting>`
+1. PyTensor graphs are considerably easier to {ref}`inspect and debug <pytensor:debug_faq>` than JAX functions
+2. PyTensor has clever {ref}`optimization and stabilization routines <pytensor:optimizations>` that are not possible or implemented in JAX
+3. PyTensor graphs can be easily {ref}`manipulated after creation <pytensor:graph_rewriting>`
 
-Point 2 means your graphs are likely to perform better if written in Aesara. In general you don't have to worry about using specialized functions like `log1p` or `logsumexp`, as Aesara will be able to detect the equivalent naive expressions and replace them by their specialized counterparts. Importantly, you still benefit from these optimizations when your graph is later compiled to JAX.
+Point 2 means your graphs are likely to perform better if written in PyTensor. In general you don't have to worry about using specialized functions like `log1p` or `logsumexp`, as PyTensor will be able to detect the equivalent naive expressions and replace them by their specialized counterparts. Importantly, you still benefit from these optimizations when your graph is later compiled to JAX.
 
-The catch is that Aesara cannot reason about JAX functions, and by association {class}`~aesara.graph.op.Op`s that wrap them. This means that the larger the portion of the graph is "hidden" inside a JAX function, the less a user will benefit from Aesara's rewrite and debugging abilities.
+The catch is that PyTensor cannot reason about JAX functions, and by association {class}`~pytensor.graph.op.Op`s that wrap them. This means that the larger the portion of the graph is "hidden" inside a JAX function, the less a user will benefit from PyTensor's rewrite and debugging abilities.
 
-Point 3 is more important for library developers. It is the main reason why PyMC developers opted to use Aesara (and before that, its predecessor Theano) as its backend. Many of the user-facing utilities provided by PyMC rely on the ability to easily parse and manipulate Aesara graphs.
+Point 3 is more important for library developers. It is the main reason why PyMC developers opted to use PyTensor (and before that, its predecessor Theano) as its backend. Many of the user-facing utilities provided by PyMC rely on the ability to easily parse and manipulate PyTensor graphs.
 
 +++
 
@@ -679,11 +679,11 @@ Point 3 is more important for library developers. It is the main reason why PyMC
 
 +++
 
-We had to create two {class}`~aesara.graph.op.Op`s, one for the function we cared about and a separate one for its gradients. However, JAX provides a `value_and_grad` utility that can return both the value of a function and its gradients. We can do something similar and get away with a single {class}`~aesara.graph.op.Op` if we are clever about it.
+We had to create two {class}`~pytensor.graph.op.Op`s, one for the function we cared about and a separate one for its gradients. However, JAX provides a `value_and_grad` utility that can return both the value of a function and its gradients. We can do something similar and get away with a single {class}`~pytensor.graph.op.Op` if we are clever about it.
 
 By doing this we can (potentially) save memory and reuse computation that is shared between the function and its gradients. This may be relevant when working with very large JAX functions.
 
-Note that this is only useful if you are interested in taking gradients with respect to your {class}`~aesara.graph.op.Op` using Aesara. If your endgoal is to compile your graph to JAX, and only then take the gradients (as NumPyro does), then it's better to use the first approach. You don't even need to implement the `grad` method and associated {class}`~aesara.graph.op.Op` in that case.
+Note that this is only useful if you are interested in taking gradients with respect to your {class}`~pytensor.graph.op.Op` using PyTensor. If your endgoal is to compile your graph to JAX, and only then take the gradients (as NumPyro does), then it's better to use the first approach. You don't even need to implement the `grad` method and associated {class}`~pytensor.graph.op.Op` in that case.
 
 ```{code-cell} ipython3
 ---
@@ -723,7 +723,7 @@ class HmmLogpValueGradOp(Op):
         # the gradient outputs! That would require computing the second order
         # gradients
         assert all(
-            isinstance(g.type, aesara.gradient.DisconnectedType) for g in output_gradients[1:]
+            isinstance(g.type, pytensor.gradient.DisconnectedType) for g in output_gradients[1:]
         )
 
         return [output_gradients[0] * grad for grad in gradients]
@@ -732,7 +732,7 @@ class HmmLogpValueGradOp(Op):
 hmm_logp_value_grad_op = HmmLogpValueGradOp()
 ```
 
-We check again that we can take the gradient using Aesara `grad` interface
+We check again that we can take the gradient using PyTensor `grad` interface
 
 ```{code-cell} ipython3
 emission_signal_variable = at.as_tensor_variable(emission_signal_true)
@@ -759,7 +759,7 @@ Authored by [Ricardo Vieira](https://github.com/ricardoV94/) in March 24, 2022 (
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/causal_inference/difference_in_differences.ipynb b/examples/causal_inference/difference_in_differences.ipynb
index 38b3531a0..a23b52e10 100644
--- a/examples/causal_inference/difference_in_differences.ipynb
+++ b/examples/causal_inference/difference_in_differences.ipynb
@@ -1202,7 +1202,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.5.0\n",
       "\n",
-      "aesara: 2.8.2\n",
+      "pytensor: 2.8.2\n",
       "aeppl : 0.0.35\n",
       "xarray: 2022.6.0\n",
       "\n",
@@ -1220,7 +1220,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/causal_inference/difference_in_differences.myst.md b/examples/causal_inference/difference_in_differences.myst.md
index b6a18029e..f59b5e36c 100644
--- a/examples/causal_inference/difference_in_differences.myst.md
+++ b/examples/causal_inference/difference_in_differences.myst.md
@@ -444,7 +444,7 @@ Of course, when using the difference in differences approach for real applicatio
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/causal_inference/excess_deaths.ipynb b/examples/causal_inference/excess_deaths.ipynb
index 426165749..787cb791c 100644
--- a/examples/causal_inference/excess_deaths.ipynb
+++ b/examples/causal_inference/excess_deaths.ipynb
@@ -82,13 +82,13 @@
     "import calendar\n",
     "import os\n",
     "\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.dates as mdates\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "import xarray as xr"
    ]
@@ -1388,13 +1388,13 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.5\n",
+      "pytensor: 2.7.5\n",
       "aeppl : 0.0.32\n",
       "xarray: 2022.3.0\n",
       "\n",
       "matplotlib: 3.5.2\n",
       "numpy     : 1.23.0\n",
-      "aesara    : 2.7.5\n",
+      "pytensor    : 2.7.5\n",
       "xarray    : 2022.3.0\n",
       "pandas    : 1.4.3\n",
       "seaborn   : 0.11.2\n",
@@ -1408,7 +1408,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/causal_inference/excess_deaths.myst.md b/examples/causal_inference/excess_deaths.myst.md
index e69f0c821..c9e60f012 100644
--- a/examples/causal_inference/excess_deaths.myst.md
+++ b/examples/causal_inference/excess_deaths.myst.md
@@ -66,13 +66,13 @@ Finally, we are _not_ claiming that $x$ people died directly from the COVID-19 v
 import calendar
 import os
 
-import aesara.tensor as at
 import arviz as az
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 import seaborn as sns
 import xarray as xr
 ```
@@ -495,7 +495,7 @@ The bad news of course, is that as of the last data point (May 2022) the number
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/causal_inference/interrupted_time_series.ipynb b/examples/causal_inference/interrupted_time_series.ipynb
index aca24ebd7..8f93acd6c 100644
--- a/examples/causal_inference/interrupted_time_series.ipynb
+++ b/examples/causal_inference/interrupted_time_series.ipynb
@@ -1074,7 +1074,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.5.0\n",
       "\n",
-      "aesara: 2.8.2\n",
+      "pytensor: 2.8.2\n",
       "aeppl : 0.0.35\n",
       "xarray: 2022.6.0\n",
       "\n",
@@ -1092,7 +1092,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/causal_inference/interrupted_time_series.myst.md b/examples/causal_inference/interrupted_time_series.myst.md
index dcac83139..167c03a30 100644
--- a/examples/causal_inference/interrupted_time_series.myst.md
+++ b/examples/causal_inference/interrupted_time_series.myst.md
@@ -355,7 +355,7 @@ There are of course many ways that the interrupted time series approach could be
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/causal_inference/regression_discontinuity.ipynb b/examples/causal_inference/regression_discontinuity.ipynb
index 8b7acbb4f..1e9a52747 100644
--- a/examples/causal_inference/regression_discontinuity.ipynb
+++ b/examples/causal_inference/regression_discontinuity.ipynb
@@ -446,8 +446,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [sigma, effect]\n"
      ]
@@ -814,7 +814,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.2.0\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "xarray: 0.20.1\n",
       "\n",
@@ -831,7 +831,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/causal_inference/regression_discontinuity.myst.md b/examples/causal_inference/regression_discontinuity.myst.md
index e4bf30d96..7667dfee6 100644
--- a/examples/causal_inference/regression_discontinuity.myst.md
+++ b/examples/causal_inference/regression_discontinuity.myst.md
@@ -236,7 +236,7 @@ In this notebook we have merely touched the surface of how to analyse data from
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/diagnostics_and_criticism/sampler-stats.ipynb b/examples/diagnostics_and_criticism/sampler-stats.ipynb
index 2839cd9c6..c0f3091c8 100644
--- a/examples/diagnostics_and_criticism/sampler-stats.ipynb
+++ b/examples/diagnostics_and_criticism/sampler-stats.ipynb
@@ -23,7 +23,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
diff --git a/examples/gaussian_processes/GP-Kron.ipynb b/examples/gaussian_processes/GP-Kron.ipynb
index 8fa92a24c..6b094fefb 100644
--- a/examples/gaussian_processes/GP-Kron.ipynb
+++ b/examples/gaussian_processes/GP-Kron.ipynb
@@ -779,7 +779,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.3.0\n",
       "\n",
-      "aesara: 2.8.6\n",
+      "pytensor: 2.8.6\n",
       "aeppl : 0.0.36\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -795,7 +795,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/gaussian_processes/GP-Kron.myst.md b/examples/gaussian_processes/GP-Kron.myst.md
index b20d44d80..893f61f6d 100644
--- a/examples/gaussian_processes/GP-Kron.myst.md
+++ b/examples/gaussian_processes/GP-Kron.myst.md
@@ -298,7 +298,7 @@ for i, ax in enumerate(axs):
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/gaussian_processes/GP-Latent.ipynb b/examples/gaussian_processes/GP-Latent.ipynb
index a5af86314..299bea137 100644
--- a/examples/gaussian_processes/GP-Latent.ipynb
+++ b/examples/gaussian_processes/GP-Latent.ipynb
@@ -1475,7 +1475,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.8.2\n",
+      "pytensor: 2.8.2\n",
       "aeppl : 0.0.35\n",
       "xarray: 2022.6.0\n",
       "\n",
@@ -1492,7 +1492,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/gaussian_processes/GP-Latent.myst.md b/examples/gaussian_processes/GP-Latent.myst.md
index 108954a9f..ff720902c 100644
--- a/examples/gaussian_processes/GP-Latent.myst.md
+++ b/examples/gaussian_processes/GP-Latent.myst.md
@@ -436,7 +436,7 @@ plt.legend(loc=(0.32, 0.65), frameon=True);
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/gaussian_processes/GP-MeansAndCovs.ipynb b/examples/gaussian_processes/GP-MeansAndCovs.ipynb
index 035b0190e..a8ef7e51f 100644
--- a/examples/gaussian_processes/GP-MeansAndCovs.ipynb
+++ b/examples/gaussian_processes/GP-MeansAndCovs.ipynb
@@ -42,13 +42,13 @@
    },
    "outputs": [],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.cm as cmap\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "import scipy.stats as stats"
    ]
   },
@@ -87,7 +87,7 @@
     "tags": []
    },
    "source": [
-    "A large set of mean and covariance functions are available in PyMC.  It is relatively easy to define custom mean and covariance functions.  Since PyMC uses Aesara, their gradients do not need to be defined by the user.  \n",
+    "A large set of mean and covariance functions are available in PyMC.  It is relatively easy to define custom mean and covariance functions.  Since PyMC uses PyTensor, their gradients do not need to be defined by the user.  \n",
     "\n",
     "## Mean functions\n",
     "\n",
@@ -200,7 +200,7 @@
     "tags": []
    },
    "source": [
-    "As long as the shape matches the input it will receive, `gp.mean.Constant` can also accept a Aesara tensor or vector of PyMC random variables."
+    "As long as the shape matches the input it will receive, `gp.mean.Constant` can also accept a PyTensor tensor or vector of PyMC random variables."
    ]
   },
   {
@@ -320,7 +320,7 @@
     "\n",
     "```\n",
     "\n",
-    "Remember that Aesara must be used instead of NumPy."
+    "Remember that PyTensor must be used instead of NumPy."
    ]
   },
   {
@@ -2272,7 +2272,7 @@
       "scipy     : 1.9.3\n",
       "pymc      : 4.3.0\n",
       "matplotlib: 3.6.2\n",
-      "aesara    : 2.8.7\n",
+      "pytensor    : 2.8.7\n",
       "\n",
       "Watermark: 2.3.0\n",
       "\n"
diff --git a/examples/gaussian_processes/GP-MeansAndCovs.myst.md b/examples/gaussian_processes/GP-MeansAndCovs.myst.md
index 460778c4a..c37af6606 100644
--- a/examples/gaussian_processes/GP-MeansAndCovs.myst.md
+++ b/examples/gaussian_processes/GP-MeansAndCovs.myst.md
@@ -33,13 +33,13 @@ papermill:
   status: completed
 tags: []
 ---
-import aesara
-import aesara.tensor as at
 import arviz as az
 import matplotlib.cm as cmap
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 import scipy.stats as stats
 ```
 
@@ -62,7 +62,7 @@ plt.rcParams["figure.figsize"] = (10, 4)
 
 +++ {"papermill": {"duration": 0.037844, "end_time": "2020-12-22T18:36:31.751886", "exception": false, "start_time": "2020-12-22T18:36:31.714042", "status": "completed"}, "tags": []}
 
-A large set of mean and covariance functions are available in PyMC.  It is relatively easy to define custom mean and covariance functions.  Since PyMC uses Aesara, their gradients do not need to be defined by the user.  
+A large set of mean and covariance functions are available in PyMC.  It is relatively easy to define custom mean and covariance functions.  Since PyMC uses PyTensor, their gradients do not need to be defined by the user.  
 
 ## Mean functions
 
@@ -117,7 +117,7 @@ print(const_func(X).eval())
 
 +++ {"papermill": {"duration": 0.039627, "end_time": "2020-12-22T18:36:35.195057", "exception": false, "start_time": "2020-12-22T18:36:35.155430", "status": "completed"}, "tags": []}
 
-As long as the shape matches the input it will receive, `gp.mean.Constant` can also accept a Aesara tensor or vector of PyMC random variables.
+As long as the shape matches the input it will receive, `gp.mean.Constant` can also accept a PyTensor tensor or vector of PyMC random variables.
 
 ```{code-cell} ipython3
 ---
@@ -179,7 +179,7 @@ class Constant(pm.gp.mean.Mean):
 
 ```
 
-Remember that Aesara must be used instead of NumPy.
+Remember that PyTensor must be used instead of NumPy.
 
 +++ {"papermill": {"duration": 0.039306, "end_time": "2020-12-22T18:36:36.998649", "exception": false, "start_time": "2020-12-22T18:36:36.959343", "status": "completed"}, "tags": []}
 
diff --git a/examples/gaussian_processes/MOGP-Coregion-Hadamard.ipynb b/examples/gaussian_processes/MOGP-Coregion-Hadamard.ipynb
index 95c665886..ae2a399e4 100644
--- a/examples/gaussian_processes/MOGP-Coregion-Hadamard.ipynb
+++ b/examples/gaussian_processes/MOGP-Coregion-Hadamard.ipynb
@@ -32,12 +32,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "from pymc.gp.util import plot_gp_dist"
    ]
@@ -1693,14 +1693,14 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.3.0\n",
       "\n",
-      "aesara: 2.8.6\n",
+      "pytensor: 2.8.6\n",
       "aeppl : 0.0.36\n",
       "xarray: 2022.3.0\n",
       "\n",
       "pymc      : 4.2.1\n",
       "arviz     : 0.13.0\n",
       "pandas    : 1.4.2\n",
-      "aesara    : 2.8.6\n",
+      "pytensor    : 2.8.6\n",
       "numpy     : 1.22.4\n",
       "matplotlib: 3.5.2\n",
       "\n",
@@ -1711,7 +1711,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/gaussian_processes/MOGP-Coregion-Hadamard.myst.md b/examples/gaussian_processes/MOGP-Coregion-Hadamard.myst.md
index 28f4ad5cc..30c2e0ebc 100644
--- a/examples/gaussian_processes/MOGP-Coregion-Hadamard.myst.md
+++ b/examples/gaussian_processes/MOGP-Coregion-Hadamard.myst.md
@@ -26,12 +26,12 @@ This notebook shows how to implement the **Intrinsic Coregionalization Model** (
 The advantage of Multi-output Gaussian Processes is their capacity to simultaneously learn and infer many outputs which have the same source of uncertainty from inputs. In this example, we model the average spin rates of several pitchers in different games from a baseball dataset.
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 
 from pymc.gp.util import plot_gp_dist
 ```
@@ -351,7 +351,7 @@ This work is supported by 2022 [Google Summer of Codes](https://summerofcode.wit
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/gaussian_processes/gaussian_process.ipynb b/examples/gaussian_processes/gaussian_process.ipynb
index 08f058df6..7ad01a480 100644
--- a/examples/gaussian_processes/gaussian_process.ipynb
+++ b/examples/gaussian_processes/gaussian_process.ipynb
@@ -37,11 +37,11 @@
     }
    ],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "\n",
     "from xarray_einstats.stats import multivariate_normal\n",
@@ -370,13 +370,13 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara         : 2.7.7\n",
+      "pytensor         : 2.7.7\n",
       "aeppl          : 0.0.32\n",
       "xarray         : 2022.6.0\n",
       "xarray_einstats: 0.4.0.dev1\n",
       "\n",
       "sys       : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:07:06) [Clang 13.0.1 ]\n",
-      "aesara    : 2.7.7\n",
+      "pytensor    : 2.7.7\n",
       "pymc      : 4.1.3\n",
       "seaborn   : 0.11.2\n",
       "matplotlib: 3.5.2\n",
@@ -390,7 +390,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray,xarray_einstats"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray,xarray_einstats"
    ]
   },
   {
diff --git a/examples/gaussian_processes/gaussian_process.myst.md b/examples/gaussian_processes/gaussian_process.myst.md
index f33f17745..9f6e33913 100644
--- a/examples/gaussian_processes/gaussian_process.myst.md
+++ b/examples/gaussian_processes/gaussian_process.myst.md
@@ -26,11 +26,11 @@ Example of simple Gaussian Process fit, adapted from Stan's [example-models repo
 For illustrative and divulgative purposes, this example builds a Gaussian process from scratch. However, PyMC includes a {mod}`module dedicated to Gaussian Processes <pymc.gp>` which is recommended instead of coding everything from scratch.
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor.tensor as at
 import seaborn as sns
 
 from xarray_einstats.stats import multivariate_normal
@@ -182,7 +182,7 @@ ax.plot(x, y, "r.");
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray,xarray_einstats
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray,xarray_einstats
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/generalized_linear_models/GLM-binomial-regression.ipynb b/examples/generalized_linear_models/GLM-binomial-regression.ipynb
index 1257ccc17..5c7504122 100644
--- a/examples/generalized_linear_models/GLM-binomial-regression.ipynb
+++ b/examples/generalized_linear_models/GLM-binomial-regression.ipynb
@@ -647,7 +647,7 @@
       "Python version       : 3.9.9\n",
       "IPython version      : 7.31.0\n",
       "\n",
-      "aesara: 2.3.2\n",
+      "pytensor: 2.3.2\n",
       "aeppl : 0.0.18\n",
       "\n",
       "arviz     : 0.11.4\n",
@@ -663,7 +663,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-binomial-regression.myst.md b/examples/generalized_linear_models/GLM-binomial-regression.myst.md
index c820cec50..b3a0ddc01 100644
--- a/examples/generalized_linear_models/GLM-binomial-regression.myst.md
+++ b/examples/generalized_linear_models/GLM-binomial-regression.myst.md
@@ -253,7 +253,7 @@ A good introduction to generalized linear models is provided by {cite:t}`roback2
 :tags: []
 
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl
+%watermark -n -u -v -iv -w -p pytensor,aeppl
 ```
 
 :::{include} ../page_footer.md :::
diff --git a/examples/generalized_linear_models/GLM-hierarchical-binomial-model.ipynb b/examples/generalized_linear_models/GLM-hierarchical-binomial-model.ipynb
index d54515c90..1c6c5a2e7 100644
--- a/examples/generalized_linear_models/GLM-hierarchical-binomial-model.ipynb
+++ b/examples/generalized_linear_models/GLM-hierarchical-binomial-model.ipynb
@@ -18,12 +18,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "from scipy.special import gammaln"
    ]
@@ -907,7 +907,7 @@
       "IPython version      : 8.3.0\n",
       "\n",
       "arviz     : 0.12.1\n",
-      "aesara    : 2.6.6\n",
+      "pytensor    : 2.6.6\n",
       "sys       : 3.9.7 (default, Sep 16 2021, 13:09:58) \n",
       "[GCC 7.5.0]\n",
       "numpy     : 1.21.5\n",
diff --git a/examples/generalized_linear_models/GLM-hierarchical-binomial-model.myst.md b/examples/generalized_linear_models/GLM-hierarchical-binomial-model.myst.md
index 15e9edb7f..03e2288c2 100644
--- a/examples/generalized_linear_models/GLM-hierarchical-binomial-model.myst.md
+++ b/examples/generalized_linear_models/GLM-hierarchical-binomial-model.myst.md
@@ -18,12 +18,12 @@ kernelspec:
 :::
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor.tensor as at
 
 from scipy.special import gammaln
 ```
diff --git a/examples/generalized_linear_models/GLM-negative-binomial-regression.ipynb b/examples/generalized_linear_models/GLM-negative-binomial-regression.ipynb
index bdade94e1..0c9065538 100644
--- a/examples/generalized_linear_models/GLM-negative-binomial-regression.ipynb
+++ b/examples/generalized_linear_models/GLM-negative-binomial-regression.ipynb
@@ -848,7 +848,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.2.0\n",
       "\n",
-      "aesara: 2.6.6\n",
+      "pytensor: 2.6.6\n",
       "aeppl : 0.0.31\n",
       "xarray: 0.20.1\n",
       "\n",
@@ -868,7 +868,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-negative-binomial-regression.myst.md b/examples/generalized_linear_models/GLM-negative-binomial-regression.myst.md
index 1ff6a7001..07ffad142 100644
--- a/examples/generalized_linear_models/GLM-negative-binomial-regression.myst.md
+++ b/examples/generalized_linear_models/GLM-negative-binomial-regression.myst.md
@@ -232,7 +232,7 @@ See also, [`bambi's` negative binomial example](https://bambinos.github.io/bambi
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/generalized_linear_models/GLM-poisson-regression.ipynb b/examples/generalized_linear_models/GLM-poisson-regression.ipynb
index 1b8bf9540..c7c6c63bb 100644
--- a/examples/generalized_linear_models/GLM-poisson-regression.ipynb
+++ b/examples/generalized_linear_models/GLM-poisson-regression.ipynb
@@ -1597,7 +1597,7 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.5\n",
+      "pytensor: 2.7.5\n",
       "aeppl : 0.0.32\n",
       "\n",
       "matplotlib: 3.5.2\n",
@@ -1615,7 +1615,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-poisson-regression.myst.md b/examples/generalized_linear_models/GLM-poisson-regression.myst.md
index 7ddb4e14d..506944180 100644
--- a/examples/generalized_linear_models/GLM-poisson-regression.myst.md
+++ b/examples/generalized_linear_models/GLM-poisson-regression.myst.md
@@ -527,7 +527,7 @@ papermill:
 tags: []
 ---
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl
+%watermark -n -u -v -iv -w -p pytensor,aeppl
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/generalized_linear_models/GLM-robust.ipynb b/examples/generalized_linear_models/GLM-robust.ipynb
index 690983479..509bb93b8 100644
--- a/examples/generalized_linear_models/GLM-robust.ipynb
+++ b/examples/generalized_linear_models/GLM-robust.ipynb
@@ -57,13 +57,13 @@
    "source": [
     "%matplotlib inline\n",
     "\n",
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "import xarray as xr"
    ]
   },
@@ -549,7 +549,7 @@
       "xarray: 2022.12.0\n",
       "\n",
       "matplotlib: 3.6.2\n",
-      "aesara    : 2.8.7\n",
+      "pytensor    : 2.8.7\n",
       "numpy     : 1.21.6\n",
       "xarray    : 2022.12.0\n",
       "pymc      : 4.4.0\n",
diff --git a/examples/generalized_linear_models/GLM-robust.myst.md b/examples/generalized_linear_models/GLM-robust.myst.md
index ec3983d51..397fc2d1a 100644
--- a/examples/generalized_linear_models/GLM-robust.myst.md
+++ b/examples/generalized_linear_models/GLM-robust.myst.md
@@ -47,13 +47,13 @@ First, let's import our modules.
 ```{code-cell} ipython3
 %matplotlib inline
 
-import aesara
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 import xarray as xr
 ```
 
diff --git a/examples/generalized_linear_models/GLM-rolling-regression.ipynb b/examples/generalized_linear_models/GLM-rolling-regression.ipynb
index 4e3eb6fa8..8bc1eaa0f 100644
--- a/examples/generalized_linear_models/GLM-rolling-regression.ipynb
+++ b/examples/generalized_linear_models/GLM-rolling-regression.ipynb
@@ -676,7 +676,7 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.5\n",
+      "pytensor: 2.7.5\n",
       "aeppl : 0.0.32\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -694,7 +694,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-rolling-regression.myst.md b/examples/generalized_linear_models/GLM-rolling-regression.myst.md
index 39545c4c5..8e22383f8 100644
--- a/examples/generalized_linear_models/GLM-rolling-regression.myst.md
+++ b/examples/generalized_linear_models/GLM-rolling-regression.myst.md
@@ -257,7 +257,7 @@ cb.ax.set_yticklabels(ticklabels);
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb b/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb
index 96f2ed979..caa239273 100644
--- a/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb
+++ b/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb
@@ -475,8 +475,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [sigma, β0, β1]\n"
      ]
@@ -903,8 +903,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [sigma, β0, β1]\n"
      ]
@@ -1452,8 +1452,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [intercept_mu, intercept_sigma, slope_mu, slope_sigma, sigma_hyperprior, sigma, β0_offset, β1_offset]\n"
      ]
@@ -1767,7 +1767,7 @@
       "Python version       : 3.9.9\n",
       "IPython version      : 7.31.0\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "\n",
       "xarray    : 0.20.2\n",
@@ -1784,7 +1784,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md b/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md
index e90e2a013..47a1c6b3a 100644
--- a/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md
+++ b/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md
@@ -551,7 +551,7 @@ If you are interested in learning more, there are a number of other [PyMC exampl
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl
+%watermark -n -u -v -iv -w -p pytensor,aeppl
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/generalized_linear_models/GLM-truncated-censored-regression.ipynb b/examples/generalized_linear_models/GLM-truncated-censored-regression.ipynb
index d9da3da9c..f937f7222 100644
--- a/examples/generalized_linear_models/GLM-truncated-censored-regression.ipynb
+++ b/examples/generalized_linear_models/GLM-truncated-censored-regression.ipynb
@@ -955,7 +955,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.5.0\n",
       "\n",
-      "aesara: 2.8.2\n",
+      "pytensor: 2.8.2\n",
       "aeppl : 0.0.35\n",
       "\n",
       "arviz     : 0.12.1\n",
@@ -971,7 +971,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl"
    ]
   },
   {
diff --git a/examples/generalized_linear_models/GLM-truncated-censored-regression.myst.md b/examples/generalized_linear_models/GLM-truncated-censored-regression.myst.md
index 57000232e..4138a3cfc 100644
--- a/examples/generalized_linear_models/GLM-truncated-censored-regression.myst.md
+++ b/examples/generalized_linear_models/GLM-truncated-censored-regression.myst.md
@@ -376,7 +376,7 @@ When looking into this topic, I found that most of the material out there focuse
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl
+%watermark -n -u -v -iv -w -p pytensor,aeppl
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/howto/api_quickstart.ipynb b/examples/howto/api_quickstart.ipynb
index 0954cd181..b8fe62b32 100644
--- a/examples/howto/api_quickstart.ipynb
+++ b/examples/howto/api_quickstart.ipynb
@@ -29,11 +29,11 @@
     }
    ],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "import pymc as pm"
+    "import pymc as pm\n",
+    "import pytensor.tensor as at"
    ]
   },
   {
@@ -291,7 +291,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`observed` supports lists, `numpy.ndarray` and `aesara` data structures."
+    "`observed` supports lists, `numpy.ndarray` and `pytensor` data structures."
    ]
   },
   {
@@ -1732,9 +1732,9 @@
    "source": [
     "## 4.1 Predicting on hold-out data\n",
     "\n",
-    "In many cases you want to predict on unseen / hold-out data. This is especially relevant in Probabilistic Machine Learning and Bayesian Deep Learning. PyMC includes a `pm.MutableData` container to help with such uses. It is a wrapper around a `aesara.shared` variable and allows the values of the data to be changed later. Otherwise, `pm.MutableData` objects can be used just like any other numpy array or tensor.\n",
+    "In many cases you want to predict on unseen / hold-out data. This is especially relevant in Probabilistic Machine Learning and Bayesian Deep Learning. PyMC includes a `pm.MutableData` container to help with such uses. It is a wrapper around a `pytensor.shared` variable and allows the values of the data to be changed later. Otherwise, `pm.MutableData` objects can be used just like any other numpy array or tensor.\n",
     "\n",
-    "This distinction is significant since internally all models in PyMC are giant symbolic expressions. When you pass raw data directly into a model, you are giving Aesara permission to treat this data as a constant and optimize it away if doing so makes sense. If you need to change this data later you may not have any way to point at it within the larger symbolic expression. Using `pm.MutableData` offers a way to point to a specific place in the symbolic expression and change what is there."
+    "This distinction is significant since internally all models in PyMC are giant symbolic expressions. When you pass raw data directly into a model, you are giving PyTensor permission to treat this data as a constant and optimize it away if doing so makes sense. If you need to change this data later you may not have any way to point at it within the larger symbolic expression. Using `pm.MutableData` offers a way to point to a specific place in the symbolic expression and change what is there."
    ]
   },
   {
@@ -2292,14 +2292,14 @@
       "Python version       : 3.9.13\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.6.2\n",
+      "pytensor: 2.6.2\n",
       "aeppl : 0.0.31\n",
       "xarray: 2022.3.0\n",
       "\n",
       "arviz     : 0.12.1\n",
       "numpy     : 1.22.4\n",
       "pymc      : 4.0.0b6\n",
-      "aesara    : 2.6.2\n",
+      "pytensor    : 2.6.2\n",
       "matplotlib: 3.5.2\n",
       "\n",
       "Watermark: 2.3.1\n",
@@ -2309,7 +2309,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/howto/api_quickstart.myst.md b/examples/howto/api_quickstart.myst.md
index 6b011e3dd..65069c64c 100644
--- a/examples/howto/api_quickstart.myst.md
+++ b/examples/howto/api_quickstart.myst.md
@@ -21,11 +21,11 @@ kernelspec:
 :::
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor.tensor as at
 ```
 
 ```{code-cell} ipython3
@@ -121,7 +121,7 @@ with pm.Model():
     obs = pm.Normal("x", mu=0, sigma=1, observed=rng.standard_normal(100))
 ```
 
-`observed` supports lists, `numpy.ndarray` and `aesara` data structures.
+`observed` supports lists, `numpy.ndarray` and `pytensor` data structures.
 
 +++
 
@@ -400,9 +400,9 @@ ax.legend(fontsize=10);
 
 ## 4.1 Predicting on hold-out data
 
-In many cases you want to predict on unseen / hold-out data. This is especially relevant in Probabilistic Machine Learning and Bayesian Deep Learning. PyMC includes a `pm.MutableData` container to help with such uses. It is a wrapper around a `aesara.shared` variable and allows the values of the data to be changed later. Otherwise, `pm.MutableData` objects can be used just like any other numpy array or tensor.
+In many cases you want to predict on unseen / hold-out data. This is especially relevant in Probabilistic Machine Learning and Bayesian Deep Learning. PyMC includes a `pm.MutableData` container to help with such uses. It is a wrapper around a `pytensor.shared` variable and allows the values of the data to be changed later. Otherwise, `pm.MutableData` objects can be used just like any other numpy array or tensor.
 
-This distinction is significant since internally all models in PyMC are giant symbolic expressions. When you pass raw data directly into a model, you are giving Aesara permission to treat this data as a constant and optimize it away if doing so makes sense. If you need to change this data later you may not have any way to point at it within the larger symbolic expression. Using `pm.MutableData` offers a way to point to a specific place in the symbolic expression and change what is there.
+This distinction is significant since internally all models in PyMC are giant symbolic expressions. When you pass raw data directly into a model, you are giving PyTensor permission to treat this data as a constant and optimize it away if doing so makes sense. If you need to change this data later you may not have any way to point at it within the larger symbolic expression. Using `pm.MutableData` offers a way to point to a specific place in the symbolic expression and change what is there.
 
 ```{code-cell} ipython3
 x = rng.standard_normal(100)
@@ -449,7 +449,7 @@ idata.posterior_predictive["obs"].mean(dim=["draw", "chain"])
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/howto/custom_distribution.ipynb b/examples/howto/custom_distribution.ipynb
index a27888fcc..855f93b21 100644
--- a/examples/howto/custom_distribution.ipynb
+++ b/examples/howto/custom_distribution.ipynb
@@ -87,7 +87,7 @@
     "\n",
     "where $\\theta > 0$ and $\\max(-1, -\\frac{\\theta}{4}) \\leq \\lambda \\leq 1$\n",
     "\n",
-    "We now define the log probability function, which is an implementation of the above formula using just Aesara operations.\n",
+    "We now define the log probability function, which is an implementation of the above formula using just PyTensor operations.\n",
     "\n",
     "Parameters:\n",
     "- `theta`: $\\theta$\n",
diff --git a/examples/howto/custom_distribution.myst.md b/examples/howto/custom_distribution.myst.md
index f1834163d..53c509346 100644
--- a/examples/howto/custom_distribution.myst.md
+++ b/examples/howto/custom_distribution.myst.md
@@ -74,7 +74,7 @@ $$\log f(y | \theta, \lambda) = \log\theta + \log\left((\theta + \lambda y)^{y-1
 
 where $\theta > 0$ and $\max(-1, -\frac{\theta}{4}) \leq \lambda \leq 1$
 
-We now define the log probability function, which is an implementation of the above formula using just Aesara operations.
+We now define the log probability function, which is an implementation of the above formula using just PyTensor operations.
 
 Parameters:
 - `theta`: $\theta$
diff --git a/examples/howto/howto_debugging.ipynb b/examples/howto/howto_debugging.ipynb
index 9ef4560f5..7fba7f357 100644
--- a/examples/howto/howto_debugging.ipynb
+++ b/examples/howto/howto_debugging.ipynb
@@ -8,7 +8,7 @@
     "# How to debug a model\n",
     "\n",
     ":::{post} August 2, 2022\n",
-    ":tags: debugging, Aesara\n",
+    ":tags: debugging, PyTensor\n",
     ":category: beginner\n",
     ":author: Thomas Wiecki, Igor Kuvychko\n",
     ":::"
@@ -21,7 +21,7 @@
     "## Introduction\n",
     "There are various levels on which to debug a model. One of the simplest is to just print out the values that different variables are taking on.\n",
     "\n",
-    "Because `PyMC` uses `Aesara` expressions to build the model, and not functions, there is no way to place a `print` statement into a likelihood function. Instead, you can use the `aesara.printing.Print` class to print intermediate values."
+    "Because `PyMC` uses `PyTensor` expressions to build the model, and not functions, there is no way to place a `print` statement into a likelihood function. Instead, you can use the `pytensor.printing.Print` class to print intermediate values."
    ]
   },
   {
@@ -53,8 +53,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### How to print intermediate values of `Aesara` functions\n",
-    "Since `Aesara` functions are compiled to C, you have to use `aesara.printing.Print` class to print intermediate values (imported  below as `Print`). Python `print` function will not work. Below is a simple example of using `Print`. For more information, see {ref}`Debugging Aesara <aesara:debug_faq>`."
+    "### How to print intermediate values of `PyTensor` functions\n",
+    "Since `PyTensor` functions are compiled to C, you have to use `pytensor.printing.Print` class to print intermediate values (imported  below as `Print`). Python `print` function will not work. Below is a simple example of using `Print`. For more information, see {ref}`Debugging PyTensor <pytensor:debug_faq>`."
    ]
   },
   {
@@ -63,10 +63,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
+    "import pytensor.tensor as at\n",
     "\n",
-    "from aesara import function\n",
-    "from aesara.printing import Print"
+    "from pytensor import function\n",
+    "from pytensor.printing import Print"
    ]
   },
   {
@@ -517,11 +517,11 @@
       "Python version       : 3.10.5\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.7.5\n",
+      "pytensor: 2.7.5\n",
       "xarray: 2022.3.0\n",
       "\n",
       "matplotlib: 3.5.2\n",
-      "aesara    : 2.7.5\n",
+      "pytensor    : 2.7.5\n",
       "numpy     : 1.23.0\n",
       "arviz     : 0.12.1\n",
       "sys       : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 06:57:19) [MSC v.1929 64 bit (AMD64)]\n",
@@ -536,7 +536,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,xarray"
    ]
   },
   {
diff --git a/examples/howto/howto_debugging.myst.md b/examples/howto/howto_debugging.myst.md
index a45512b27..72c12391b 100644
--- a/examples/howto/howto_debugging.myst.md
+++ b/examples/howto/howto_debugging.myst.md
@@ -14,7 +14,7 @@ kernelspec:
 # How to debug a model
 
 :::{post} August 2, 2022
-:tags: debugging, Aesara
+:tags: debugging, PyTensor
 :category: beginner
 :author: Thomas Wiecki, Igor Kuvychko
 :::
@@ -24,7 +24,7 @@ kernelspec:
 ## Introduction
 There are various levels on which to debug a model. One of the simplest is to just print out the values that different variables are taking on.
 
-Because `PyMC` uses `Aesara` expressions to build the model, and not functions, there is no way to place a `print` statement into a likelihood function. Instead, you can use the `aesara.printing.Print` class to print intermediate values.
+Because `PyMC` uses `PyTensor` expressions to build the model, and not functions, there is no way to place a `print` statement into a likelihood function. Instead, you can use the `pytensor.printing.Print` class to print intermediate values.
 
 ```{code-cell} ipython3
 import arviz as az
@@ -41,14 +41,14 @@ import pymc as pm
 RANDOM_SEED = 8927
 ```
 
-### How to print intermediate values of `Aesara` functions
-Since `Aesara` functions are compiled to C, you have to use `aesara.printing.Print` class to print intermediate values (imported  below as `Print`). Python `print` function will not work. Below is a simple example of using `Print`. For more information, see {ref}`Debugging Aesara <aesara:debug_faq>`.
+### How to print intermediate values of `PyTensor` functions
+Since `PyTensor` functions are compiled to C, you have to use `pytensor.printing.Print` class to print intermediate values (imported  below as `Print`). Python `print` function will not work. Below is a simple example of using `Print`. For more information, see {ref}`Debugging PyTensor <pytensor:debug_faq>`.
 
 ```{code-cell} ipython3
-import aesara.tensor as at
+import pytensor.tensor as at
 
-from aesara import function
-from aesara.printing import Print
+from pytensor import function
+from pytensor.printing import Print
 ```
 
 ```{code-cell} ipython3
@@ -190,7 +190,7 @@ output.shape
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,xarray
+%watermark -n -u -v -iv -w -p pytensor,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/howto/lasso_block_update.ipynb b/examples/howto/lasso_block_update.ipynb
index d5daeeec7..0927b9a96 100644
--- a/examples/howto/lasso_block_update.ipynb
+++ b/examples/howto/lasso_block_update.ipynb
@@ -293,7 +293,7 @@
       "Python version       : 3.9.10\n",
       "IPython version      : 8.0.1\n",
       "\n",
-      "aesara: 2.3.2\n",
+      "pytensor: 2.3.2\n",
       "aeppl : 0.0.18\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -309,7 +309,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/howto/lasso_block_update.myst.md b/examples/howto/lasso_block_update.myst.md
index 9a993f35d..6c918bef3 100644
--- a/examples/howto/lasso_block_update.myst.md
+++ b/examples/howto/lasso_block_update.myst.md
@@ -116,7 +116,7 @@ az.plot_pair(
 :tags: []
 
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/howto/sampling_compound_step.ipynb b/examples/howto/sampling_compound_step.ipynb
index 317e42e17..698da2b0f 100644
--- a/examples/howto/sampling_compound_step.ipynb
+++ b/examples/howto/sampling_compound_step.ipynb
@@ -36,10 +36,10 @@
    },
    "outputs": [],
    "source": [
-    "import aesara\n",
     "import arviz as az\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
     "import xarray"
    ]
   },
@@ -162,7 +162,7 @@
     }
    ],
    "source": [
-    "n_ = aesara.shared(np.asarray([10, 15]))\n",
+    "n_ = pytensor.shared(np.asarray([10, 15]))\n",
     "with pm.Model() as m:\n",
     "    p = pm.Beta(\"p\", 1.0, 1.0)\n",
     "    ni = pm.Bernoulli(\"ni\", 0.5)\n",
@@ -804,7 +804,7 @@
       "IPython version      : 7.30.1\n",
       "\n",
       "pymc  : 4.0.0b1\n",
-      "aesara: 2.3.2\n",
+      "pytensor: 2.3.2\n",
       "arviz : 0.11.4\n",
       "xarray: 0.18.2\n",
       "numpy : 1.21.1\n",
diff --git a/examples/howto/sampling_compound_step.myst.md b/examples/howto/sampling_compound_step.myst.md
index bbdca1a05..70bba9b81 100644
--- a/examples/howto/sampling_compound_step.myst.md
+++ b/examples/howto/sampling_compound_step.myst.md
@@ -20,10 +20,10 @@ This notebook explains how the compound steps work in `pymc.sample` function whe
 - What happens to sample statistics that occur in multiple step methods?
 
 ```{code-cell} ipython3
-import aesara
 import arviz as az
 import numpy as np
 import pymc as pm
+import pytensor
 import xarray
 ```
 
@@ -60,7 +60,7 @@ To conduct Markov chain Monte Carlo (MCMC) sampling to generate posterior sample
 When we call `pm.sample(return_inferencedata=False)`, `PyMC` assigns the best step method to each of the free random variables. Take the following example
 
 ```{code-cell} ipython3
-n_ = aesara.shared(np.asarray([10, 15]))
+n_ = pytensor.shared(np.asarray([10, 15]))
 with pm.Model() as m:
     p = pm.Beta("p", 1.0, 1.0)
     ni = pm.Bernoulli("ni", 0.5)
diff --git a/examples/mixture_models/gaussian_mixture_model.ipynb b/examples/mixture_models/gaussian_mixture_model.ipynb
index 8c2b1b2a4..1b70c20b4 100644
--- a/examples/mixture_models/gaussian_mixture_model.ipynb
+++ b/examples/mixture_models/gaussian_mixture_model.ipynb
@@ -236,8 +236,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc-dev-py39/lib/python3.9/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [μ, σ, w]\n",
       "ld: unsupported tapi file type '!tapi-tbd' in YAML file '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib/libSystem.tbd' for architecture x86_64\n",
@@ -421,7 +421,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.2.0\n",
       "\n",
-      "aesara         : 2.5.1\n",
+      "pytensor         : 2.5.1\n",
       "aeppl          : 0.0.27\n",
       "xarray         : 0.20.1\n",
       "xarray_einstats: 0.2.2\n",
@@ -439,7 +439,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray,xarray_einstats"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray,xarray_einstats"
    ]
   },
   {
diff --git a/examples/mixture_models/gaussian_mixture_model.myst.md b/examples/mixture_models/gaussian_mixture_model.myst.md
index fc8c668b0..79e58a378 100644
--- a/examples/mixture_models/gaussian_mixture_model.myst.md
+++ b/examples/mixture_models/gaussian_mixture_model.myst.md
@@ -116,7 +116,7 @@ ax[2].set(title="Group membership", xlabel="x", ylabel="Probability");
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray,xarray_einstats
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray,xarray_einstats
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/samplers/MLDA_introduction.ipynb b/examples/samplers/MLDA_introduction.ipynb
index d55540c72..74fb37a95 100644
--- a/examples/samplers/MLDA_introduction.ipynb
+++ b/examples/samplers/MLDA_introduction.ipynb
@@ -65,7 +65,7 @@
     "\n",
     "[Variance reduction 1](./MLDA_variance_reduction_linear_regression.ipynb) and [Variance reduction 2](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_variance_reduction_groundwater.ipynb) (external link): Those two notebooks demonstrate the variance reduction feature in a linear regression model and a groundwater flow model. This feature allows the user to define a quantity of interest that they need to estimate using the MCMC samples. It then collects those quantities of interest, as well as differences of these quantities between levels, during MLDA sampling. The collected quentities can then be used to produce an estimate which has lower variance than a standard estimate that uses samples from the fine chain only. The first notebook does not have external dependencies, while the second one requires FEniCS. Note that the second notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency.\n",
     "\n",
-    "[Adaptive error model](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_adaptive_error_model.ipynb) (external link): In this notebook we use MLDA to tackle another inverse problem; groundwarer flow modeling. The aim is to infer the posterior distribution of model parameters (hydraulic conductivity) given data (measurements of hydraulic head). In this example we make use of Aesara Ops in order to define a \"black box\" likelihood, i.e. a likelihood that uses external code. Specifically, our likelihood uses the [FEniCS](https://fenicsproject.org/) library to solve a PDE. This is a common scenario, as PDEs of this type are slow to solve with scipy or other standard libraries. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency. We employ the adaptive error model (AEM) feature and compare the performance of basic MLDA with AEM-enhanced MLDA. The idea of Adaptive Error Model (AEM) is to estimate the mean and variance of the forward-model error between adjacent levels, i.e. estimate the bias of the coarse forward model compared to the fine forward model, and use those estimates to correct the coarse model. Using the technique should improve ESS/sec on the fine level.\n",
+    "[Adaptive error model](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_adaptive_error_model.ipynb) (external link): In this notebook we use MLDA to tackle another inverse problem; groundwarer flow modeling. The aim is to infer the posterior distribution of model parameters (hydraulic conductivity) given data (measurements of hydraulic head). In this example we make use of PyTensor Ops in order to define a \"black box\" likelihood, i.e. a likelihood that uses external code. Specifically, our likelihood uses the [FEniCS](https://fenicsproject.org/) library to solve a PDE. This is a common scenario, as PDEs of this type are slow to solve with scipy or other standard libraries. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency. We employ the adaptive error model (AEM) feature and compare the performance of basic MLDA with AEM-enhanced MLDA. The idea of Adaptive Error Model (AEM) is to estimate the mean and variance of the forward-model error between adjacent levels, i.e. estimate the bias of the coarse forward model compared to the fine forward model, and use those estimates to correct the coarse model. Using the technique should improve ESS/sec on the fine level.\n",
     "\n",
     "[Benchmarks and tuning](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_benchmarks_tuning.ipynb) (external link): In this notebook we benchmark MLDA against other samplers using different parameterizations of the groundwater flow model. We also give some advice on tuning MLDA. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency."
    ]
diff --git a/examples/samplers/MLDA_introduction.myst.md b/examples/samplers/MLDA_introduction.myst.md
index 0824a7c48..0289556db 100644
--- a/examples/samplers/MLDA_introduction.myst.md
+++ b/examples/samplers/MLDA_introduction.myst.md
@@ -59,7 +59,7 @@ Please note that the MLDA sampler is new in PyMC. The user should be extra criti
 
 [Variance reduction 1](./MLDA_variance_reduction_linear_regression.ipynb) and [Variance reduction 2](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_variance_reduction_groundwater.ipynb) (external link): Those two notebooks demonstrate the variance reduction feature in a linear regression model and a groundwater flow model. This feature allows the user to define a quantity of interest that they need to estimate using the MCMC samples. It then collects those quantities of interest, as well as differences of these quantities between levels, during MLDA sampling. The collected quentities can then be used to produce an estimate which has lower variance than a standard estimate that uses samples from the fine chain only. The first notebook does not have external dependencies, while the second one requires FEniCS. Note that the second notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency.
 
-[Adaptive error model](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_adaptive_error_model.ipynb) (external link): In this notebook we use MLDA to tackle another inverse problem; groundwarer flow modeling. The aim is to infer the posterior distribution of model parameters (hydraulic conductivity) given data (measurements of hydraulic head). In this example we make use of Aesara Ops in order to define a "black box" likelihood, i.e. a likelihood that uses external code. Specifically, our likelihood uses the [FEniCS](https://fenicsproject.org/) library to solve a PDE. This is a common scenario, as PDEs of this type are slow to solve with scipy or other standard libraries. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency. We employ the adaptive error model (AEM) feature and compare the performance of basic MLDA with AEM-enhanced MLDA. The idea of Adaptive Error Model (AEM) is to estimate the mean and variance of the forward-model error between adjacent levels, i.e. estimate the bias of the coarse forward model compared to the fine forward model, and use those estimates to correct the coarse model. Using the technique should improve ESS/sec on the fine level.
+[Adaptive error model](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_adaptive_error_model.ipynb) (external link): In this notebook we use MLDA to tackle another inverse problem; groundwarer flow modeling. The aim is to infer the posterior distribution of model parameters (hydraulic conductivity) given data (measurements of hydraulic head). In this example we make use of PyTensor Ops in order to define a "black box" likelihood, i.e. a likelihood that uses external code. Specifically, our likelihood uses the [FEniCS](https://fenicsproject.org/) library to solve a PDE. This is a common scenario, as PDEs of this type are slow to solve with scipy or other standard libraries. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency. We employ the adaptive error model (AEM) feature and compare the performance of basic MLDA with AEM-enhanced MLDA. The idea of Adaptive Error Model (AEM) is to estimate the mean and variance of the forward-model error between adjacent levels, i.e. estimate the bias of the coarse forward model compared to the fine forward model, and use those estimates to correct the coarse model. Using the technique should improve ESS/sec on the fine level.
 
 [Benchmarks and tuning](https://github.com/alan-turing-institute/pymc/blob/mlda_all_notebooks/docs/source/notebooks/MLDA_benchmarks_tuning.ipynb) (external link): In this notebook we benchmark MLDA against other samplers using different parameterizations of the groundwater flow model. We also give some advice on tuning MLDA. Note that this notebook is outside the core PyMC repository because FEniCS is not a PyMC dependency.
 
diff --git a/examples/samplers/SMC2_gaussians.ipynb b/examples/samplers/SMC2_gaussians.ipynb
index 373924688..9c9677a66 100644
--- a/examples/samplers/SMC2_gaussians.ipynb
+++ b/examples/samplers/SMC2_gaussians.ipynb
@@ -26,10 +26,10 @@
     }
    ],
    "source": [
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "print(f\"Running on PyMC v{pm.__version__}\")"
    ]
@@ -457,7 +457,7 @@
       "[GCC 7.5.0]\n",
       "arviz : 0.12.0\n",
       "numpy : 1.21.5\n",
-      "aesara: 2.6.2\n",
+      "pytensor: 2.6.2\n",
       "pymc  : 4.0.0b6\n",
       "\n",
       "Watermark: 2.3.0\n",
diff --git a/examples/samplers/SMC2_gaussians.myst.md b/examples/samplers/SMC2_gaussians.myst.md
index dc12d5355..ed8be2d29 100644
--- a/examples/samplers/SMC2_gaussians.myst.md
+++ b/examples/samplers/SMC2_gaussians.myst.md
@@ -18,10 +18,10 @@ kernelspec:
 :::
 
 ```{code-cell} ipython3
-import aesara.tensor as at
 import arviz as az
 import numpy as np
 import pymc as pm
+import pytensor.tensor as at
 
 print(f"Running on PyMC v{pm.__version__}")
 ```
diff --git a/examples/survival_analysis/censored_data.ipynb b/examples/survival_analysis/censored_data.ipynb
index ebc152a7b..ccb74c2ba 100644
--- a/examples/survival_analysis/censored_data.ipynb
+++ b/examples/survival_analysis/censored_data.ipynb
@@ -182,8 +182,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [mu, sigma]\n"
      ]
@@ -297,8 +297,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [mu, sigma]\n"
      ]
@@ -420,8 +420,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [mu, sigma, right_censored, left_censored]\n"
      ]
@@ -563,8 +563,8 @@
      "text": [
       "Auto-assigning NUTS sampler...\n",
       "Initializing NUTS using jitter+adapt_diag...\n",
-      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/aesaraf.py:1005: UserWarning: The parameter 'updates' of aesara.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
-      "  aesara_function = aesara.function(\n",
+      "/Users/benjamv/opt/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/pytensorf.py:1005: UserWarning: The parameter 'updates' of pytensor.function() expects an OrderedDict, got <class 'dict'>. Using a standard dictionary here results in non-deterministic behavior. You should use an OrderedDict if you are using Python 2.7 (collections.OrderedDict for older python), or use a list of (shared, update) pairs. Do not just convert your dictionary to this type before the call as the conversion will still be non-deterministic.\n",
+      "  pytensor_function = pytensor.function(\n",
       "Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [mu, sigma]\n"
      ]
@@ -690,7 +690,7 @@
       "Python version       : 3.10.4\n",
       "IPython version      : 8.3.0\n",
       "\n",
-      "aesara: 2.5.1\n",
+      "pytensor: 2.5.1\n",
       "aeppl : 0.0.27\n",
       "\n",
       "seaborn   : 0.11.2\n",
@@ -706,7 +706,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl"
    ]
   }
  ],
diff --git a/examples/survival_analysis/censored_data.myst.md b/examples/survival_analysis/censored_data.myst.md
index 930f52895..6de390828 100644
--- a/examples/survival_analysis/censored_data.myst.md
+++ b/examples/survival_analysis/censored_data.myst.md
@@ -239,5 +239,5 @@ As we can see, both censored models appear to capture the mean and variance of t
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl
+%watermark -n -u -v -iv -w -p pytensor,aeppl
 ```
diff --git a/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.ipynb b/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.ipynb
index 128ce436e..6e087269b 100644
--- a/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.ipynb
+++ b/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.ipynb
@@ -1196,7 +1196,7 @@
       "Python version       : 3.9.12\n",
       "IPython version      : 8.3.0\n",
       "\n",
-      "aesara: 2.6.2\n",
+      "pytensor: 2.6.2\n",
       "aeppl : 0.0.28\n",
       "xarray: 2022.3.0\n",
       "\n",
@@ -1213,7 +1213,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.myst.md b/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.myst.md
index b0ff85919..c45ac02e9 100644
--- a/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.myst.md
+++ b/examples/time_series/Air_passengers-Prophet_with_Bayesian_workflow.myst.md
@@ -368,7 +368,7 @@ For reference, you might also want to check out:
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/time_series/Forecasting_with_structural_timeseries.ipynb b/examples/time_series/Forecasting_with_structural_timeseries.ipynb
index 21ae10808..90ad2bef3 100644
--- a/examples/time_series/Forecasting_with_structural_timeseries.ipynb
+++ b/examples/time_series/Forecasting_with_structural_timeseries.ipynb
@@ -8360,7 +8360,7 @@
       "Python version       : 3.9.0\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.8.7\n",
+      "pytensor: 2.8.7\n",
       "aeppl : 0.0.38\n",
       "xarray: 2022.10.0\n",
       "\n",
@@ -8379,7 +8379,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/time_series/Forecasting_with_structural_timeseries.myst.md b/examples/time_series/Forecasting_with_structural_timeseries.myst.md
index b2e9e42b0..9013f3b7d 100644
--- a/examples/time_series/Forecasting_with_structural_timeseries.myst.md
+++ b/examples/time_series/Forecasting_with_structural_timeseries.myst.md
@@ -755,7 +755,7 @@ Adapted from Nathaniel Forde's [Examined Algorithms Blog](https://nathanielf.git
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/time_series/bayesian_var_model.ipynb b/examples/time_series/bayesian_var_model.ipynb
index 5c91362cb..77b43dc98 100644
--- a/examples/time_series/bayesian_var_model.ipynb
+++ b/examples/time_series/bayesian_var_model.ipynb
@@ -824,10 +824,10 @@
       "INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [lag_coefs, alpha, noise_chol]\n",
       "INFO:pymc:NUTS: [lag_coefs, alpha, noise_chol]\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
@@ -1825,10 +1825,10 @@
       "INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)\n",
       "NUTS: [lag_coefs, alpha, noise_chol]\n",
       "INFO:pymc:NUTS: [lag_coefs, alpha, noise_chol]\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n",
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
@@ -16823,7 +16823,7 @@
       "Python version       : 3.9.0\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.8.7\n",
+      "pytensor: 2.8.7\n",
       "aeppl : 0.0.38\n",
       "xarray: 2022.10.0\n",
       "\n",
@@ -16843,7 +16843,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,aeppl,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray"
    ]
   },
   {
diff --git a/examples/time_series/bayesian_var_model.myst.md b/examples/time_series/bayesian_var_model.myst.md
index 5db56d28f..928412f0f 100644
--- a/examples/time_series/bayesian_var_model.myst.md
+++ b/examples/time_series/bayesian_var_model.myst.md
@@ -761,7 +761,7 @@ In the next post in this series we will spend some time digging into the implied
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,aeppl,xarray
 ```
 
 :::{include} ../page_footer.md
diff --git a/examples/variational_inference/bayesian_neural_network_advi.ipynb b/examples/variational_inference/bayesian_neural_network_advi.ipynb
index 621eae3cf..d62234a36 100644
--- a/examples/variational_inference/bayesian_neural_network_advi.ipynb
+++ b/examples/variational_inference/bayesian_neural_network_advi.ipynb
@@ -76,12 +76,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
     "import arviz as az\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pymc as pm\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "import seaborn as sns\n",
     "\n",
     "from sklearn.datasets import make_moons\n",
@@ -96,7 +96,7 @@
    "outputs": [],
    "source": [
     "%config InlineBackend.figure_format = 'retina'\n",
-    "floatX = aesara.config.floatX\n",
+    "floatX = pytensor.config.floatX\n",
     "RANDOM_SEED = 9927\n",
     "rng = np.random.default_rng(RANDOM_SEED)\n",
     "az.style.use(\"arviz-darkgrid\")"
@@ -869,7 +869,7 @@
       "xarray: 2022.3.0\n",
       "\n",
       "arviz     : 0.12.0\n",
-      "aesara    : 2.6.2\n",
+      "pytensor    : 2.6.2\n",
       "matplotlib: 3.5.1\n",
       "sys       : 3.9.10 | packaged by conda-forge | (main, Feb  1 2022, 21:24:11) \n",
       "[GCC 9.4.0]\n",
diff --git a/examples/variational_inference/bayesian_neural_network_advi.myst.md b/examples/variational_inference/bayesian_neural_network_advi.myst.md
index 678586c8a..4cd0f8881 100644
--- a/examples/variational_inference/bayesian_neural_network_advi.myst.md
+++ b/examples/variational_inference/bayesian_neural_network_advi.myst.md
@@ -64,12 +64,12 @@ While this would allow Probabilistic Programming to be applied to a much wider s
 First, lets generate some toy data -- a simple binary classification problem that's not linearly separable.
 
 ```{code-cell} ipython3
-import aesara
-import aesara.tensor as at
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
 import pymc as pm
+import pytensor
+import pytensor.tensor as at
 import seaborn as sns
 
 from sklearn.datasets import make_moons
@@ -79,7 +79,7 @@ from sklearn.preprocessing import scale
 
 ```{code-cell} ipython3
 %config InlineBackend.figure_format = 'retina'
-floatX = aesara.config.floatX
+floatX = pytensor.config.floatX
 RANDOM_SEED = 9927
 rng = np.random.default_rng(RANDOM_SEED)
 az.style.use("arviz-darkgrid")
diff --git a/examples/variational_inference/pathfinder.ipynb b/examples/variational_inference/pathfinder.ipynb
index b83169ec7..009bbd4ad 100644
--- a/examples/variational_inference/pathfinder.ipynb
+++ b/examples/variational_inference/pathfinder.ipynb
@@ -23,7 +23,7 @@
    "source": [
     "Pathfinder {cite:p}`zhang2021pathfinder` is a variational inference algorithm that produces samples from the posterior of a Bayesian model. It compares favorably to the widely used ADVI algorithm. On large problems, it should scale better than most MCMC algorithms, including dynamic HMC (i.e. NUTS), at the cost of a more biased estimate of the posterior. For details on the algorithm, see the [arxiv preprint](https://arxiv.org/abs/2108.03782).\n",
     "\n",
-    "This algorithm is [implemented](https://github.com/blackjax-devs/blackjax/pull/194) in [BlackJAX](https://github.com/blackjax-devs/blackjax), a library of inference algorithms for [JAX](https://github.com/google/jax). Through PyMC's JAX-backend (through [aesara](https://github.com/aesara-devs/aesara)) we can run BlackJAX's pathfinder on any PyMC model with some simple wrapper code.\n",
+    "This algorithm is [implemented](https://github.com/blackjax-devs/blackjax/pull/194) in [BlackJAX](https://github.com/blackjax-devs/blackjax), a library of inference algorithms for [JAX](https://github.com/google/jax). Through PyMC's JAX-backend (through [pytensor](https://github.com/pytensor-devs/pytensor)) we can run BlackJAX's pathfinder on any PyMC model with some simple wrapper code.\n",
     "\n",
     "This wrapper code is implemented in [pymcx](https://github.com/pymc-devs/pymcx/). This tutorial shows how to run Pathfinder on your PyMC model.\n",
     "\n",
@@ -195,7 +195,7 @@
       "Python version       : 3.10.6\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.8.2\n",
+      "pytensor: 2.8.2\n",
       "xarray: 2022.6.0\n",
       "\n",
       "pymc_experimental: 0.0.1\n",
@@ -210,7 +210,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara,xarray"
+    "%watermark -n -u -v -iv -w -p pytensor,xarray"
    ]
   },
   {
diff --git a/examples/variational_inference/pathfinder.myst.md b/examples/variational_inference/pathfinder.myst.md
index fe7195fac..b12788bd3 100644
--- a/examples/variational_inference/pathfinder.myst.md
+++ b/examples/variational_inference/pathfinder.myst.md
@@ -24,7 +24,7 @@ kernelspec:
 
 Pathfinder {cite:p}`zhang2021pathfinder` is a variational inference algorithm that produces samples from the posterior of a Bayesian model. It compares favorably to the widely used ADVI algorithm. On large problems, it should scale better than most MCMC algorithms, including dynamic HMC (i.e. NUTS), at the cost of a more biased estimate of the posterior. For details on the algorithm, see the [arxiv preprint](https://arxiv.org/abs/2108.03782).
 
-This algorithm is [implemented](https://github.com/blackjax-devs/blackjax/pull/194) in [BlackJAX](https://github.com/blackjax-devs/blackjax), a library of inference algorithms for [JAX](https://github.com/google/jax). Through PyMC's JAX-backend (through [aesara](https://github.com/aesara-devs/aesara)) we can run BlackJAX's pathfinder on any PyMC model with some simple wrapper code.
+This algorithm is [implemented](https://github.com/blackjax-devs/blackjax/pull/194) in [BlackJAX](https://github.com/blackjax-devs/blackjax), a library of inference algorithms for [JAX](https://github.com/google/jax). Through PyMC's JAX-backend (through [pytensor](https://github.com/pytensor-devs/pytensor)) we can run BlackJAX's pathfinder on any PyMC model with some simple wrapper code.
 
 This wrapper code is implemented in [pymcx](https://github.com/pymc-devs/pymcx/). This tutorial shows how to run Pathfinder on your PyMC model.
 
@@ -89,7 +89,7 @@ az.plot_trace(idata);
 
 ```{code-cell} ipython3
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,xarray
+%watermark -n -u -v -iv -w -p pytensor,xarray
 ```
 
 :::{include} ../page_footer.md