diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 3548518785..1de687e1ba 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -97,6 +97,7 @@ All of the above apply to:
 This includes API changes we did not warn about since at least `3.11.0` (2021-01).
 
 - Setting initial values through `pm.Distribution(testval=...)` is now `pm.Distribution(initval=...)`.
+- Alternative `sd` keyword argument has been removed from all distributions. `sigma` should be used instead (see [#5583](https://github.com/pymc-devs/pymc/pull/5583)).
 
 
 ### New features
diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
index 55b13d110b..ea3c464c6d 100644
--- a/benchmarks/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks/benchmarks.py
@@ -32,17 +32,17 @@ def glm_hierarchical_model(random_seed=123):
 
     n_counties = len(data.county.unique())
     with pm.Model() as model:
-        mu_a = pm.Normal("mu_a", mu=0.0, sd=100**2)
+        mu_a = pm.Normal("mu_a", mu=0.0, sigma=100**2)
         sigma_a = pm.HalfCauchy("sigma_a", 5)
-        mu_b = pm.Normal("mu_b", mu=0.0, sd=100**2)
+        mu_b = pm.Normal("mu_b", mu=0.0, sigma=100**2)
         sigma_b = pm.HalfCauchy("sigma_b", 5)
-        a = pm.Normal("a", mu=0, sd=1, shape=n_counties)
-        b = pm.Normal("b", mu=0, sd=1, shape=n_counties)
+        a = pm.Normal("a", mu=0, sigma=1, shape=n_counties)
+        b = pm.Normal("b", mu=0, sigma=1, shape=n_counties)
         a = mu_a + sigma_a * a
         b = mu_b + sigma_b * b
         eps = pm.HalfCauchy("eps", 5)
         radon_est = a[county_idx] + b[county_idx] * data.floor.values
-        pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon)
+        pm.Normal("radon_like", mu=radon_est, sigma=eps, observed=data.log_radon)
     return model
 
 
@@ -58,7 +58,7 @@ def mixture_model(random_seed=1234):
 
     with pm.Model() as model:
         w = pm.Dirichlet("w", a=np.ones_like(w_true))
-        mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
+        mu = pm.Normal("mu", mu=0.0, sigma=10.0, shape=w_true.shape)
         enforce_order = pm.Potential(
             "enforce_order",
             at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf)
@@ -88,7 +88,7 @@ class OverheadSuite:
     def setup(self, step):
         self.n_steps = 10000
         with pm.Model() as self.model:
-            pm.Normal("x", mu=0, sd=1)
+            pm.Normal("x", mu=0, sigma=1)
 
     def time_overhead_sample(self, step):
         with self.model:
@@ -133,8 +133,8 @@ def time_drug_evaluation(self):
         sigma_low = 1
         sigma_high = 10
         with pm.Model():
-            group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
-            group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
+            group1_mean = pm.Normal("group1_mean", y_mean, sigma=y_std)
+            group2_mean = pm.Normal("group2_mean", y_mean, sigma=y_std)
             group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high)
             group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high)
             lambda_1 = group1_std**-2
@@ -301,7 +301,7 @@ def freefall(y, t, p):
             # If we know one of the parameter values, we can simply pass the value.
             ode_solution = ode_model(y0=[0], theta=[gamma, 9.8])
             # The ode_solution has a shape of (n_times, n_states)
-            Y = pm.Normal("Y", mu=ode_solution, sd=sigma, observed=y)
+            Y = pm.Normal("Y", mu=ode_solution, sigma=sigma, observed=y)
 
             t0 = time.time()
             idata = pm.sample(500, tune=1000, chains=2, cores=2, random_seed=0)
diff --git a/docs/source/PyMC_and_Aesara.rst b/docs/source/PyMC_and_Aesara.rst
index 66bb3e3e69..7bf1b5d63a 100644
--- a/docs/source/PyMC_and_Aesara.rst
+++ b/docs/source/PyMC_and_Aesara.rst
@@ -188,8 +188,8 @@ example::
 
     with pm.Model() as model:
         mu = pm.Normal('mu', 0, 1)
-        sd = pm.HalfNormal('sd', 1)
-        y = pm.Normal('y', mu=mu, sigma=sd, observed=data)
+        sigma = pm.HalfNormal('sigma', 1)
+        y = pm.Normal('y', mu=mu, sigma=sigma, observed=data)
 
 is roughly equivalent to this::
 
@@ -203,10 +203,10 @@ is roughly equivalent to this::
     model.add_free_variable(sd_log__)
     model.add_logp_term(corrected_logp_half_normal(sd_log__))
 
-    sd = at.exp(sd_log__)
-    model.add_deterministic_variable(sd)
+    sigma = at.exp(sd_log__)
+    model.add_deterministic_variable(sigma)
 
-    model.add_logp_term(pm.Normal.dist(mu, sd).logp(data))
+    model.add_logp_term(pm.Normal.dist(mu, sigma).logp(data))
 
 The return values of the variable constructors are subclasses
 of Aesara variables, so when we define a variable we can use any
@@ -217,5 +217,5 @@ Aesara operation on them::
         # beta is a at.dvector
         beta = pm.Normal('beta', 0, 1, shape=len(design_matrix))
         predict = at.dot(design_matrix, beta)
-        sd = pm.HalfCauchy('sd', beta=2.5)
-        pm.Normal('y', mu=predict, sigma=sd, observed=data)
+        sigma = pm.HalfCauchy('sigma', beta=2.5)
+        pm.Normal('y', mu=predict, sigma=sigma, observed=data)
diff --git a/docs/source/contributing/developer_guide.rst b/docs/source/contributing/developer_guide.rst
index 100bfbab0c..c1b7c90228 100644
--- a/docs/source/contributing/developer_guide.rst
+++ b/docs/source/contributing/developer_guide.rst
@@ -888,8 +888,8 @@ others. The challenge and some summary of the solution could be found in Luciano
 
     with pm.Model() as m:
         mu = pm.Normal('mu', 0., 1., shape=(5, 1))
-        sd = pm.HalfNormal('sd', 5., shape=(1, 10))
-        pm.Normal('x', mu=mu, sigma=sd, observed=np.random.randn(2, 5, 10))
+        sigma = pm.HalfNormal('sigma', 5., shape=(1, 10))
+        pm.Normal('x', mu=mu, sigma=sigma, observed=np.random.randn(2, 5, 10))
         trace = pm.sample_prior_predictive(100)
 
     trace['x'].shape # ==> should be (100, 2, 5, 10)
diff --git a/docs/source/learn/examples/dimensionality.ipynb b/docs/source/learn/examples/dimensionality.ipynb
index 363d8e8a9b..333d3360ce 100644
--- a/docs/source/learn/examples/dimensionality.ipynb
+++ b/docs/source/learn/examples/dimensionality.ipynb
@@ -159,7 +159,7 @@
     }
    ],
    "source": [
-    "random_sample = pm.Normal.dist(mu=[1,10,100], sd=.0001).eval()\n",
+    "random_sample = pm.Normal.dist(mu=[1,10,100], sigma=.0001).eval()\n",
     "random_sample, random_sample.shape"
    ]
   },
@@ -236,68 +236,7 @@
    "outputs": [
     {
      "data": {
-      "image/svg+xml": [
-       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
-       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
-       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
-       "<!-- Generated by graphviz version 2.43.0 (0)\n",
-       " -->\n",
-       "<!-- Title: %3 Pages: 1 -->\n",
-       "<svg width=\"796pt\" height=\"138pt\"\n",
-       " viewBox=\"0.00 0.00 796.50 137.95\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
-       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 133.95)\">\n",
-       "<title>%3</title>\n",
-       "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-133.95 792.5,-133.95 792.5,4 -4,4\"/>\n",
-       "<g id=\"clust1\" class=\"cluster\">\n",
-       "<title>cluster3</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M120.5,-8C120.5,-8 296.5,-8 296.5,-8 302.5,-8 308.5,-14 308.5,-20 308.5,-20 308.5,-109.95 308.5,-109.95 308.5,-115.95 302.5,-121.95 296.5,-121.95 296.5,-121.95 120.5,-121.95 120.5,-121.95 114.5,-121.95 108.5,-115.95 108.5,-109.95 108.5,-109.95 108.5,-20 108.5,-20 108.5,-14 114.5,-8 120.5,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"295.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
-       "</g>\n",
-       "<g id=\"clust2\" class=\"cluster\">\n",
-       "<title>cluster4</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M328.5,-8C328.5,-8 542.5,-8 542.5,-8 548.5,-8 554.5,-14 554.5,-20 554.5,-20 554.5,-109.95 554.5,-109.95 554.5,-115.95 548.5,-121.95 542.5,-121.95 542.5,-121.95 328.5,-121.95 328.5,-121.95 322.5,-121.95 316.5,-115.95 316.5,-109.95 316.5,-109.95 316.5,-20 316.5,-20 316.5,-14 322.5,-8 328.5,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"541.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n",
-       "</g>\n",
-       "<g id=\"clust3\" class=\"cluster\">\n",
-       "<title>cluster5</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M574.5,-8C574.5,-8 768.5,-8 768.5,-8 774.5,-8 780.5,-14 780.5,-20 780.5,-20 780.5,-109.95 780.5,-109.95 780.5,-115.95 774.5,-121.95 768.5,-121.95 768.5,-121.95 574.5,-121.95 574.5,-121.95 568.5,-121.95 562.5,-115.95 562.5,-109.95 562.5,-109.95 562.5,-20 562.5,-20 562.5,-14 568.5,-8 574.5,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"767.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n",
-       "</g>\n",
-       "<!-- scalar -->\n",
-       "<g id=\"node1\" class=\"node\">\n",
-       "<title>scalar</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"49.5\" cy=\"-76.48\" rx=\"49.49\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"49.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">scalar</text>\n",
-       "<text text-anchor=\"middle\" x=\"49.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"49.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- vector (implied) -->\n",
-       "<g id=\"node2\" class=\"node\">\n",
-       "<title>vector (implied)</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"208.5\" cy=\"-76.48\" rx=\"91.85\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"208.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (implied)</text>\n",
-       "<text text-anchor=\"middle\" x=\"208.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"208.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- vector (from shape) -->\n",
-       "<g id=\"node3\" class=\"node\">\n",
-       "<title>vector (from shape)</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"435.5\" cy=\"-76.48\" rx=\"111.03\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"435.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (from shape)</text>\n",
-       "<text text-anchor=\"middle\" x=\"435.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"435.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- vector (from size) -->\n",
-       "<g id=\"node4\" class=\"node\">\n",
-       "<title>vector (from size)</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"671.5\" cy=\"-76.48\" rx=\"101.23\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"671.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (from size)</text>\n",
-       "<text text-anchor=\"middle\" x=\"671.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"671.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "</g>\n",
-       "</svg>\n"
-      ],
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: %3 Pages: 1 -->\n<svg width=\"796pt\" height=\"138pt\"\n viewBox=\"0.00 0.00 796.50 137.95\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 133.95)\">\n<title>%3</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-133.95 792.5,-133.95 792.5,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M120.5,-8C120.5,-8 296.5,-8 296.5,-8 302.5,-8 308.5,-14 308.5,-20 308.5,-20 308.5,-109.95 308.5,-109.95 308.5,-115.95 302.5,-121.95 296.5,-121.95 296.5,-121.95 120.5,-121.95 120.5,-121.95 114.5,-121.95 108.5,-115.95 108.5,-109.95 108.5,-109.95 108.5,-20 108.5,-20 108.5,-14 114.5,-8 120.5,-8\"/>\n<text text-anchor=\"middle\" x=\"295.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n</g>\n<g id=\"clust2\" class=\"cluster\">\n<title>cluster4</title>\n<path fill=\"none\" stroke=\"black\" d=\"M328.5,-8C328.5,-8 542.5,-8 542.5,-8 548.5,-8 554.5,-14 554.5,-20 554.5,-20 554.5,-109.95 554.5,-109.95 554.5,-115.95 548.5,-121.95 542.5,-121.95 542.5,-121.95 328.5,-121.95 328.5,-121.95 322.5,-121.95 316.5,-115.95 316.5,-109.95 316.5,-109.95 316.5,-20 316.5,-20 316.5,-14 322.5,-8 328.5,-8\"/>\n<text text-anchor=\"middle\" x=\"541.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n</g>\n<g id=\"clust3\" class=\"cluster\">\n<title>cluster5</title>\n<path fill=\"none\" stroke=\"black\" d=\"M574.5,-8C574.5,-8 768.5,-8 768.5,-8 774.5,-8 780.5,-14 780.5,-20 780.5,-20 780.5,-109.95 780.5,-109.95 780.5,-115.95 774.5,-121.95 768.5,-121.95 768.5,-121.95 574.5,-121.95 574.5,-121.95 568.5,-121.95 562.5,-115.95 562.5,-109.95 562.5,-109.95 562.5,-20 562.5,-20 562.5,-14 568.5,-8 574.5,-8\"/>\n<text text-anchor=\"middle\" x=\"767.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n</g>\n<!-- scalar -->\n<g id=\"node1\" class=\"node\">\n<title>scalar</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"49.5\" cy=\"-76.48\" rx=\"49.49\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"49.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">scalar</text>\n<text text-anchor=\"middle\" x=\"49.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"49.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- vector (implied) -->\n<g id=\"node2\" class=\"node\">\n<title>vector (implied)</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"208.5\" cy=\"-76.48\" rx=\"91.85\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"208.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (implied)</text>\n<text text-anchor=\"middle\" x=\"208.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"208.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- vector (from shape) -->\n<g id=\"node3\" class=\"node\">\n<title>vector (from shape)</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"435.5\" cy=\"-76.48\" rx=\"111.03\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"435.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (from shape)</text>\n<text text-anchor=\"middle\" x=\"435.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"435.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- vector (from size) -->\n<g id=\"node4\" class=\"node\">\n<title>vector (from size)</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"671.5\" cy=\"-76.48\" rx=\"101.23\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"671.5\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">vector (from size)</text>\n<text text-anchor=\"middle\" x=\"671.5\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"671.5\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n</g>\n</svg>\n",
       "text/plain": [
        "<graphviz.graphs.Digraph at 0x7f57f58a5cd0>"
       ]
@@ -332,61 +271,7 @@
    "outputs": [
     {
      "data": {
-      "image/svg+xml": [
-       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
-       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
-       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
-       "<!-- Generated by graphviz version 2.43.0 (0)\n",
-       " -->\n",
-       "<!-- Title: %3 Pages: 1 -->\n",
-       "<svg width=\"260pt\" height=\"252pt\"\n",
-       " viewBox=\"0.00 0.00 260.00 251.91\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
-       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 247.91)\">\n",
-       "<title>%3</title>\n",
-       "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-247.91 256,-247.91 256,4 -4,4\"/>\n",
-       "<g id=\"clust1\" class=\"cluster\">\n",
-       "<title>clusterB (2)</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M20,-121.95C20,-121.95 110,-121.95 110,-121.95 116,-121.95 122,-127.95 122,-133.95 122,-133.95 122,-223.91 122,-223.91 122,-229.91 116,-235.91 110,-235.91 110,-235.91 20,-235.91 20,-235.91 14,-235.91 8,-229.91 8,-223.91 8,-223.91 8,-133.95 8,-133.95 8,-127.95 14,-121.95 20,-121.95\"/>\n",
-       "<text text-anchor=\"middle\" x=\"96.5\" y=\"-129.75\" font-family=\"Times,serif\" font-size=\"14.00\">B (2)</text>\n",
-       "</g>\n",
-       "<g id=\"clust2\" class=\"cluster\">\n",
-       "<title>clusterDim_A (4)</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M142,-8C142,-8 232,-8 232,-8 238,-8 244,-14 244,-20 244,-20 244,-223.91 244,-223.91 244,-229.91 238,-235.91 232,-235.91 232,-235.91 142,-235.91 142,-235.91 136,-235.91 130,-229.91 130,-223.91 130,-223.91 130,-20 130,-20 130,-14 136,-8 142,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"201\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">Dim_A (4)</text>\n",
-       "</g>\n",
-       "<!-- red -->\n",
-       "<g id=\"node1\" class=\"node\">\n",
-       "<title>red</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"65\" cy=\"-190.43\" rx=\"49.49\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"65\" y=\"-201.73\" font-family=\"Times,serif\" font-size=\"14.00\">red</text>\n",
-       "<text text-anchor=\"middle\" x=\"65\" y=\"-186.73\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"65\" y=\"-171.73\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- one -->\n",
-       "<g id=\"node2\" class=\"node\">\n",
-       "<title>one</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"187\" cy=\"-190.43\" rx=\"49.49\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-201.73\" font-family=\"Times,serif\" font-size=\"14.00\">one</text>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-186.73\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-171.73\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- two -->\n",
-       "<g id=\"node3\" class=\"node\">\n",
-       "<title>two</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"187\" cy=\"-76.48\" rx=\"49.49\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">two</text>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"187\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n",
-       "</g>\n",
-       "<!-- one&#45;&gt;two -->\n",
-       "<g id=\"edge1\" class=\"edge\">\n",
-       "<title>one&#45;&gt;two</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M187,-152.77C187,-143.69 187,-133.83 187,-124.33\"/>\n",
-       "<polygon fill=\"black\" stroke=\"black\" points=\"190.5,-124.22 187,-114.22 183.5,-124.22 190.5,-124.22\"/>\n",
-       "</g>\n",
-       "</g>\n",
-       "</svg>\n"
-      ],
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: %3 Pages: 1 -->\n<svg width=\"260pt\" height=\"252pt\"\n viewBox=\"0.00 0.00 260.00 251.91\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 247.91)\">\n<title>%3</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-247.91 256,-247.91 256,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>clusterB (2)</title>\n<path fill=\"none\" stroke=\"black\" d=\"M20,-121.95C20,-121.95 110,-121.95 110,-121.95 116,-121.95 122,-127.95 122,-133.95 122,-133.95 122,-223.91 122,-223.91 122,-229.91 116,-235.91 110,-235.91 110,-235.91 20,-235.91 20,-235.91 14,-235.91 8,-229.91 8,-223.91 8,-223.91 8,-133.95 8,-133.95 8,-127.95 14,-121.95 20,-121.95\"/>\n<text text-anchor=\"middle\" x=\"96.5\" y=\"-129.75\" font-family=\"Times,serif\" font-size=\"14.00\">B (2)</text>\n</g>\n<g id=\"clust2\" class=\"cluster\">\n<title>clusterDim_A (4)</title>\n<path fill=\"none\" stroke=\"black\" d=\"M142,-8C142,-8 232,-8 232,-8 238,-8 244,-14 244,-20 244,-20 244,-223.91 244,-223.91 244,-229.91 238,-235.91 232,-235.91 232,-235.91 142,-235.91 142,-235.91 136,-235.91 130,-229.91 130,-223.91 130,-223.91 130,-20 130,-20 130,-14 136,-8 142,-8\"/>\n<text text-anchor=\"middle\" x=\"201\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">Dim_A (4)</text>\n</g>\n<!-- red -->\n<g id=\"node1\" class=\"node\">\n<title>red</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"65\" cy=\"-190.43\" rx=\"49.49\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"65\" y=\"-201.73\" font-family=\"Times,serif\" font-size=\"14.00\">red</text>\n<text text-anchor=\"middle\" x=\"65\" y=\"-186.73\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"65\" y=\"-171.73\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- one -->\n<g id=\"node2\" class=\"node\">\n<title>one</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"187\" cy=\"-190.43\" rx=\"49.49\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"187\" y=\"-201.73\" font-family=\"Times,serif\" font-size=\"14.00\">one</text>\n<text text-anchor=\"middle\" x=\"187\" y=\"-186.73\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"187\" y=\"-171.73\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- two -->\n<g id=\"node3\" class=\"node\">\n<title>two</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"187\" cy=\"-76.48\" rx=\"49.49\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"187\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">two</text>\n<text text-anchor=\"middle\" x=\"187\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"187\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">Normal</text>\n</g>\n<!-- one&#45;&gt;two -->\n<g id=\"edge1\" class=\"edge\">\n<title>one&#45;&gt;two</title>\n<path fill=\"none\" stroke=\"black\" d=\"M187,-152.77C187,-143.69 187,-133.83 187,-124.33\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"190.5,-124.22 187,-114.22 183.5,-124.22 190.5,-124.22\"/>\n</g>\n</g>\n</svg>\n",
       "text/plain": [
        "<graphviz.graphs.Digraph at 0x7f57f586ff40>"
       ]
@@ -509,73 +394,7 @@
     },
     {
      "data": {
-      "image/svg+xml": [
-       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
-       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
-       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
-       "<!-- Generated by graphviz version 2.43.0 (0)\n",
-       " -->\n",
-       "<!-- Title: %3 Pages: 1 -->\n",
-       "<svg width=\"714pt\" height=\"138pt\"\n",
-       " viewBox=\"0.00 0.00 714.00 137.95\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
-       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 133.95)\">\n",
-       "<title>%3</title>\n",
-       "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-133.95 710,-133.95 710,4 -4,4\"/>\n",
-       "<g id=\"clust1\" class=\"cluster\">\n",
-       "<title>cluster3</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M20,-8C20,-8 142,-8 142,-8 148,-8 154,-14 154,-20 154,-20 154,-109.95 154,-109.95 154,-115.95 148,-121.95 142,-121.95 142,-121.95 20,-121.95 20,-121.95 14,-121.95 8,-115.95 8,-109.95 8,-109.95 8,-20 8,-20 8,-14 14,-8 20,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"141\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n",
-       "</g>\n",
-       "<g id=\"clust2\" class=\"cluster\">\n",
-       "<title>clusterrepeats (3) x implied (2)</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M174,-8C174,-8 339,-8 339,-8 345,-8 351,-14 351,-20 351,-20 351,-109.95 351,-109.95 351,-115.95 345,-121.95 339,-121.95 339,-121.95 174,-121.95 174,-121.95 168,-121.95 162,-115.95 162,-109.95 162,-109.95 162,-20 162,-20 162,-14 168,-8 174,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"256.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">repeats (3) x implied (2)</text>\n",
-       "</g>\n",
-       "<g id=\"clust3\" class=\"cluster\">\n",
-       "<title>clusterrepeats (3) x None (2)</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M371,-8C371,-8 520,-8 520,-8 526,-8 532,-14 532,-20 532,-20 532,-109.95 532,-109.95 532,-115.95 526,-121.95 520,-121.95 520,-121.95 371,-121.95 371,-121.95 365,-121.95 359,-115.95 359,-109.95 359,-109.95 359,-20 359,-20 359,-14 365,-8 371,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"445.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">repeats (3) x None (2)</text>\n",
-       "</g>\n",
-       "<g id=\"clust4\" class=\"cluster\">\n",
-       "<title>clusteryear (3) x None (2)</title>\n",
-       "<path fill=\"none\" stroke=\"black\" d=\"M552,-8C552,-8 686,-8 686,-8 692,-8 698,-14 698,-20 698,-20 698,-109.95 698,-109.95 698,-115.95 692,-121.95 686,-121.95 686,-121.95 552,-121.95 552,-121.95 546,-121.95 540,-115.95 540,-109.95 540,-109.95 540,-20 540,-20 540,-14 546,-8 552,-8\"/>\n",
-       "<text text-anchor=\"middle\" x=\"622.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">year (3) x None (2)</text>\n",
-       "</g>\n",
-       "<!-- implied -->\n",
-       "<g id=\"node1\" class=\"node\">\n",
-       "<title>implied</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"81\" cy=\"-76.48\" rx=\"65.11\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"81\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">implied</text>\n",
-       "<text text-anchor=\"middle\" x=\"81\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"81\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n",
-       "</g>\n",
-       "<!-- with size -->\n",
-       "<g id=\"node2\" class=\"node\">\n",
-       "<title>with size</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"256\" cy=\"-76.48\" rx=\"65.11\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"256\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with size</text>\n",
-       "<text text-anchor=\"middle\" x=\"256\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"256\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n",
-       "</g>\n",
-       "<!-- with shape -->\n",
-       "<g id=\"node3\" class=\"node\">\n",
-       "<title>with shape</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"445\" cy=\"-76.48\" rx=\"67.35\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"445\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with shape</text>\n",
-       "<text text-anchor=\"middle\" x=\"445\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"445\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n",
-       "</g>\n",
-       "<!-- with coords -->\n",
-       "<g id=\"node4\" class=\"node\">\n",
-       "<title>with coords</title>\n",
-       "<ellipse fill=\"none\" stroke=\"black\" cx=\"619\" cy=\"-76.48\" rx=\"70.92\" ry=\"37.45\"/>\n",
-       "<text text-anchor=\"middle\" x=\"619\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with coords</text>\n",
-       "<text text-anchor=\"middle\" x=\"619\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n",
-       "<text text-anchor=\"middle\" x=\"619\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n",
-       "</g>\n",
-       "</g>\n",
-       "</svg>\n"
-      ],
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: %3 Pages: 1 -->\n<svg width=\"714pt\" height=\"138pt\"\n viewBox=\"0.00 0.00 714.00 137.95\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 133.95)\">\n<title>%3</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-133.95 710,-133.95 710,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M20,-8C20,-8 142,-8 142,-8 148,-8 154,-14 154,-20 154,-20 154,-109.95 154,-109.95 154,-115.95 148,-121.95 142,-121.95 142,-121.95 20,-121.95 20,-121.95 14,-121.95 8,-115.95 8,-109.95 8,-109.95 8,-20 8,-20 8,-14 14,-8 20,-8\"/>\n<text text-anchor=\"middle\" x=\"141\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n</g>\n<g id=\"clust2\" class=\"cluster\">\n<title>clusterrepeats (3) x implied (2)</title>\n<path fill=\"none\" stroke=\"black\" d=\"M174,-8C174,-8 339,-8 339,-8 345,-8 351,-14 351,-20 351,-20 351,-109.95 351,-109.95 351,-115.95 345,-121.95 339,-121.95 339,-121.95 174,-121.95 174,-121.95 168,-121.95 162,-115.95 162,-109.95 162,-109.95 162,-20 162,-20 162,-14 168,-8 174,-8\"/>\n<text text-anchor=\"middle\" x=\"256.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">repeats (3) x implied (2)</text>\n</g>\n<g id=\"clust3\" class=\"cluster\">\n<title>clusterrepeats (3) x None (2)</title>\n<path fill=\"none\" stroke=\"black\" d=\"M371,-8C371,-8 520,-8 520,-8 526,-8 532,-14 532,-20 532,-20 532,-109.95 532,-109.95 532,-115.95 526,-121.95 520,-121.95 520,-121.95 371,-121.95 371,-121.95 365,-121.95 359,-115.95 359,-109.95 359,-109.95 359,-20 359,-20 359,-14 365,-8 371,-8\"/>\n<text text-anchor=\"middle\" x=\"445.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">repeats (3) x None (2)</text>\n</g>\n<g id=\"clust4\" class=\"cluster\">\n<title>clusteryear (3) x None (2)</title>\n<path fill=\"none\" stroke=\"black\" d=\"M552,-8C552,-8 686,-8 686,-8 692,-8 698,-14 698,-20 698,-20 698,-109.95 698,-109.95 698,-115.95 692,-121.95 686,-121.95 686,-121.95 552,-121.95 552,-121.95 546,-121.95 540,-115.95 540,-109.95 540,-109.95 540,-20 540,-20 540,-14 546,-8 552,-8\"/>\n<text text-anchor=\"middle\" x=\"622.5\" y=\"-15.8\" font-family=\"Times,serif\" font-size=\"14.00\">year (3) x None (2)</text>\n</g>\n<!-- implied -->\n<g id=\"node1\" class=\"node\">\n<title>implied</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"81\" cy=\"-76.48\" rx=\"65.11\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"81\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">implied</text>\n<text text-anchor=\"middle\" x=\"81\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"81\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n</g>\n<!-- with size -->\n<g id=\"node2\" class=\"node\">\n<title>with size</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"256\" cy=\"-76.48\" rx=\"65.11\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"256\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with size</text>\n<text text-anchor=\"middle\" x=\"256\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"256\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n</g>\n<!-- with shape -->\n<g id=\"node3\" class=\"node\">\n<title>with shape</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"445\" cy=\"-76.48\" rx=\"67.35\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"445\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with shape</text>\n<text text-anchor=\"middle\" x=\"445\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"445\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n</g>\n<!-- with coords -->\n<g id=\"node4\" class=\"node\">\n<title>with coords</title>\n<ellipse fill=\"none\" stroke=\"black\" cx=\"619\" cy=\"-76.48\" rx=\"70.92\" ry=\"37.45\"/>\n<text text-anchor=\"middle\" x=\"619\" y=\"-87.78\" font-family=\"Times,serif\" font-size=\"14.00\">with coords</text>\n<text text-anchor=\"middle\" x=\"619\" y=\"-72.78\" font-family=\"Times,serif\" font-size=\"14.00\">~</text>\n<text text-anchor=\"middle\" x=\"619\" y=\"-57.78\" font-family=\"Times,serif\" font-size=\"14.00\">MvNormal</text>\n</g>\n</g>\n</svg>\n",
       "text/plain": [
        "<graphviz.graphs.Digraph at 0x7f57f1477e20>"
       ]
diff --git a/docs/source/learn/examples/posterior_predictive.ipynb b/docs/source/learn/examples/posterior_predictive.ipynb
index 4faee5878a..4b9a820285 100644
--- a/docs/source/learn/examples/posterior_predictive.ipynb
+++ b/docs/source/learn/examples/posterior_predictive.ipynb
@@ -151,9 +151,9 @@
     "    b = pm.Normal(\"b\", 0.0, 10.0)\n",
     "\n",
     "    mu = a + b * predictor_scaled\n",
-    "    sd = pm.Exponential(\"sd\", 1.0)\n",
+    "    sigma = pm.Exponential(\"sigma\", 1.0)\n",
     "\n",
-    "    pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
+    "    pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
     "    idata = pm.sample_prior_predictive(samples=50)"
    ]
   },
@@ -212,9 +212,9 @@
     "    b = pm.Normal(\"b\", 0.0, 1.0)\n",
     "\n",
     "    mu = a + b * predictor_scaled\n",
-    "    sd = pm.Exponential(\"sd\", 1.0)\n",
+    "    sigma = pm.Exponential(\"sigma\", 1.0)\n",
     "\n",
-    "    pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
+    "    pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
     "    idata = pm.sample_prior_predictive(samples=50)"
    ]
   },
@@ -328,7 +328,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sd` in that sample:"
+    "Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sigma` in that sample:"
    ]
   },
   {
diff --git a/pymc/data.py b/pymc/data.py
index d3624e88fc..3d74d659df 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -205,8 +205,8 @@ class Minibatch(TensorVariable):
 
     >>> with pm.Model() as model:
     ...     mu = pm.Flat('mu')
-    ...     sd = pm.HalfNormal('sd')
-    ...     lik = pm.Normal('lik', mu, sd, observed=x, total_size=(100, 100))
+    ...     sigma = pm.HalfNormal('sigma')
+    ...     lik = pm.Normal('lik', mu, sigma, observed=x, total_size=(100, 100))
 
 
     Then you can perform regular Variational Inference out of the box
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 03c4a5a2e3..9090aa1856 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -546,13 +546,10 @@ class Normal(Continuous):
     rv_op = normal
 
     @classmethod
-    def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs):
-        if sd is not None:
-            sigma = sd
+    def dist(cls, mu=0, sigma=None, tau=None, no_assert=False, **kwargs):
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
         sigma = at.as_tensor_variable(sigma)
 
-        # sd = sigma
         # tau = at.as_tensor_variable(tau)
         # mean = median = mode = mu = at.as_tensor_variable(floatX(mu))
         # variance = 1.0 / self.tau
@@ -710,13 +707,11 @@ def dist(
         mu: Optional[DIST_PARAMETER_TYPES] = None,
         sigma: Optional[DIST_PARAMETER_TYPES] = None,
         tau: Optional[DIST_PARAMETER_TYPES] = None,
-        sd: Optional[DIST_PARAMETER_TYPES] = None,
         lower: Optional[DIST_PARAMETER_TYPES] = None,
         upper: Optional[DIST_PARAMETER_TYPES] = None,
         *args,
         **kwargs,
     ) -> RandomVariable:
-        sigma = sd if sd is not None else sigma
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
         sigma = at.as_tensor_variable(sigma)
         tau = at.as_tensor_variable(tau)
@@ -866,10 +861,7 @@ class HalfNormal(PositiveContinuous):
     rv_op = halfnormal
 
     @classmethod
-    def dist(cls, sigma=None, tau=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, sigma=None, tau=None, *args, **kwargs):
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
 
         assert_negative_support(tau, "tau", "HalfNormal")
@@ -1226,10 +1218,7 @@ class Beta(UnitContinuous):
     rv_op = aesara.tensor.random.beta
 
     @classmethod
-    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
         alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
         alpha = at.as_tensor_variable(floatX(alpha))
         beta = at.as_tensor_variable(floatX(beta))
@@ -1785,10 +1774,7 @@ class LogNormal(PositiveContinuous):
     rv_op = lognormal
 
     @classmethod
-    def dist(cls, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, mu=0, sigma=None, tau=None, *args, **kwargs):
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
 
         mu = at.as_tensor_variable(floatX(mu))
@@ -1914,9 +1900,7 @@ class StudentT(Continuous):
     rv_op = studentt
 
     @classmethod
-    def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
+    def dist(cls, nu, mu=0, lam=None, sigma=None, *args, **kwargs):
         nu = at.as_tensor_variable(floatX(nu))
         lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
         sigma = at.as_tensor_variable(sigma)
@@ -2306,10 +2290,7 @@ class Gamma(PositiveContinuous):
     rv_op = gamma
 
     @classmethod
-    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=False, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, no_assert=False, **kwargs):
         alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
         alpha = at.as_tensor_variable(floatX(alpha))
         beta = at.as_tensor_variable(floatX(beta))
@@ -2426,10 +2407,7 @@ class InverseGamma(PositiveContinuous):
     rv_op = invgamma
 
     @classmethod
-    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
         alpha, beta = cls._get_alpha_beta(alpha, beta, mu, sigma)
         alpha = at.as_tensor_variable(floatX(alpha))
         beta = at.as_tensor_variable(floatX(beta))
@@ -2750,11 +2728,7 @@ class HalfStudentT(PositiveContinuous):
     rv_op = halfstudentt
 
     @classmethod
-    def dist(cls, nu=1, sigma=None, lam=None, sd=None, *args, **kwargs):
-
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, nu=1, sigma=None, lam=None, *args, **kwargs):
         nu = at.as_tensor_variable(floatX(nu))
         lam, sigma = get_tau_sigma(lam, sigma)
         sigma = at.as_tensor_variable(sigma)
@@ -2886,11 +2860,7 @@ class ExGaussian(Continuous):
     rv_op = exgaussian
 
     @classmethod
-    def dist(cls, mu=0.0, sigma=None, nu=None, sd=None, *args, **kwargs):
-
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, mu=0.0, sigma=None, nu=None, *args, **kwargs):
         mu = at.as_tensor_variable(floatX(mu))
         sigma = at.as_tensor_variable(floatX(sigma))
         nu = at.as_tensor_variable(floatX(nu))
@@ -3118,10 +3088,7 @@ class SkewNormal(Continuous):
     rv_op = skewnormal
 
     @classmethod
-    def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, *args, **kwargs):
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
         alpha = at.as_tensor_variable(floatX(alpha))
         mu = at.as_tensor_variable(floatX(mu))
@@ -3445,10 +3412,7 @@ class Rice(PositiveContinuous):
     rv_op = rice
 
     @classmethod
-    def dist(cls, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
-        if sd is not None:
-            sigma = sd
-
+    def dist(cls, nu=None, sigma=None, b=None, *args, **kwargs):
         nu, b, sigma = cls.get_nu_b(nu, b, sigma)
         b = at.as_tensor_variable(floatX(b))
         sigma = at.as_tensor_variable(floatX(sigma))
@@ -3657,12 +3621,10 @@ class LogitNormal(UnitContinuous):
     rv_op = logit_normal
 
     @classmethod
-    def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs):
-        if sd is not None:
-            sigma = sd
+    def dist(cls, mu=0, sigma=None, tau=None, **kwargs):
         mu = at.as_tensor_variable(floatX(mu))
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
-        sigma = sd = at.as_tensor_variable(sigma)
+        sigma = at.as_tensor_variable(sigma)
         tau = at.as_tensor_variable(tau)
         assert_negative_support(sigma, "sigma", "LogitNormal")
         assert_negative_support(tau, "tau", "LogitNormal")
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index ad07fee965..c5df649dd9 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -514,17 +514,13 @@ class NormalMixture:
             y = pm.NormalMixture("y", w=weights, mu=μ, sigma=σ, observed=data)
     """
 
-    def __new__(cls, name, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
-        if sd is not None:
-            sigma = sd
+    def __new__(cls, name, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
         _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
 
         return Mixture(name, w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)
 
     @classmethod
-    def dist(cls, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
-        if sd is not None:
-            sigma = sd
+    def dist(cls, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
         _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
 
         return Mixture.dist(w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 6691ad2e93..c059984958 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -108,15 +108,10 @@ class AR(distribution.Continuous):
         distribution for initial values (Defaults to Flat())
     """
 
-    def __init__(
-        self, rho, sigma=None, tau=None, constant=False, init=None, sd=None, *args, **kwargs
-    ):
+    def __init__(self, rho, sigma=None, tau=None, constant=False, init=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        if sd is not None:
-            sigma = sd
-
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
-        self.sigma = self.sd = at.as_tensor_variable(sigma)
+        self.sigma = at.as_tensor_variable(sigma)
         self.tau = at.as_tensor_variable(tau)
 
         self.mean = at.as_tensor_variable(0.0)
@@ -201,17 +196,15 @@ class GaussianRandomWalk(distribution.Continuous):
         distribution for initial value (Defaults to Flat())
     """
 
-    def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kwargs):
+    def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
         kwargs.setdefault("shape", 1)
         super().__init__(*args, **kwargs)
         if sum(self.shape) == 0:
             raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
-        if sd is not None:
-            sigma = sd
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
         self.tau = at.as_tensor_variable(tau)
         sigma = at.as_tensor_variable(sigma)
-        self.sigma = self.sd = sigma
+        self.sigma = sigma
         self.mu = at.as_tensor_variable(mu)
         self.init = init or Flat.dist()
         self.mean = at.as_tensor_variable(0.0)
@@ -400,8 +393,8 @@ def logp(self, x):
         xt = x[:-1]
         f, g = self.sde_fn(x[:-1], *self.sde_pars)
         mu = xt + self.dt * f
-        sd = at.sqrt(self.dt) * g
-        return at.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:]))
+        sigma = at.sqrt(self.dt) * g
+        return at.sum(Normal.dist(mu=mu, sigma=sigma).logp(x[1:]))
 
     def _distr_parameters_for_repr(self):
         return ["dt"]
diff --git a/pymc/model.py b/pymc/model.py
index ab695f6c96..aad3abd8f2 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -482,7 +482,7 @@ def __init__(self, mean=0, sigma=1, name=''):
                 Normal('v2', mu=mean, sigma=sd)
 
                 # something more complex is allowed, too
-                half_cauchy = HalfCauchy('sd', beta=10, initval=1.)
+                half_cauchy = HalfCauchy('sigma', beta=10, initval=1.)
                 Normal('v3', mu=mean, sigma=half_cauchy)
 
                 # Deterministic variables can be used in usual way
diff --git a/pymc/tests/models.py b/pymc/tests/models.py
index cac49ba3aa..463940d239 100644
--- a/pymc/tests/models.py
+++ b/pymc/tests/models.py
@@ -212,7 +212,7 @@ def beta_bernoulli(n=2):
 def simple_normal(bounded_prior=False):
     """Simple normal for testing MLE / MAP; probes issue #2482."""
     x0 = 10.0
-    sd = 1.0
+    sigma = 1.0
     a, b = (9, 12)  # bounds for uniform RV, need non-symmetric to reproduce issue
 
     with pm.Model(rng_seeder=2482) as model:
@@ -220,6 +220,6 @@ def simple_normal(bounded_prior=False):
             mu_i = pm.Uniform("mu_i", a, b)
         else:
             mu_i = pm.Flat("mu_i")
-        pm.Normal("X_obs", mu=mu_i, sigma=sd, observed=x0)
+        pm.Normal("X_obs", mu=mu_i, sigma=sigma, observed=x0)
 
     return model.compute_initial_point(), model, None
diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py
index ce140b6b5b..c0c09d736a 100644
--- a/pymc/tests/test_distributions_random.py
+++ b/pymc/tests/test_distributions_random.py
@@ -837,7 +837,7 @@ class TestNormalTau(BaseTestDistributionRandom):
 
 class TestNormalSd(BaseTestDistributionRandom):
     pymc_dist = pm.Normal
-    pymc_dist_params = {"mu": 1.0, "sd": 5.0}
+    pymc_dist_params = {"mu": 1.0, "sigma": 5.0}
     expected_rv_op_params = {"mu": 1.0, "sigma": 5.0}
     checks_to_run = ["check_pymc_params_match_rv_op"]
 
@@ -871,7 +871,7 @@ class TestHalfNormalTau(BaseTestDistributionRandom):
 
 class TestHalfNormalSd(BaseTestDistributionRandom):
     pymc_dist = pm.Normal
-    pymc_dist_params = {"sd": 5.0}
+    pymc_dist_params = {"sigma": 5.0}
     expected_rv_op_params = {"mu": 0.0, "sigma": 5.0}
     checks_to_run = ["check_pymc_params_match_rv_op"]
 
@@ -1457,7 +1457,7 @@ class TestLognormalTau(BaseTestDistributionRandom):
 
 class TestLognormalSd(BaseTestDistributionRandom):
     pymc_dist = pm.Lognormal
-    pymc_dist_params = {"mu": 1.0, "sd": 5.0}
+    pymc_dist_params = {"mu": 1.0, "sigma": 5.0}
     expected_rv_op_params = {"mu": 1.0, "sigma": 5.0}
     checks_to_run = ["check_pymc_params_match_rv_op"]
 
diff --git a/pymc/tests/test_idata_conversion.py b/pymc/tests/test_idata_conversion.py
index 00f1575b86..97d6535ab3 100644
--- a/pymc/tests/test_idata_conversion.py
+++ b/pymc/tests/test_idata_conversion.py
@@ -52,14 +52,14 @@ def __init__(self, model, trace):
     @pytest.fixture(scope="class")
     def data(self, eight_schools_params, draws, chains):
         with pm.Model() as model:
-            mu = pm.Normal("mu", mu=0, sd=5)
+            mu = pm.Normal("mu", mu=0, sigma=5)
             tau = pm.HalfCauchy("tau", beta=5)
-            eta = pm.Normal("eta", mu=0, sd=1, size=eight_schools_params["J"])
+            eta = pm.Normal("eta", mu=0, sigma=1, size=eight_schools_params["J"])
             theta = pm.Deterministic("theta", mu + tau * eta)
             pm.Normal(
                 "obs",
                 mu=theta,
-                sd=eight_schools_params["sigma"],
+                sigma=eight_schools_params["sigma"],
                 observed=eight_schools_params["y"],
             )
             trace = pm.sample(draws, chains=chains, return_inferencedata=False)
@@ -249,15 +249,17 @@ def test_autodetect_coords_from_model(self, use_context):
 
         coords = {"date": df_data.index, "city": df_data.columns}
         with pm.Model(coords=coords) as model:
-            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0)
-            city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city")
+            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sigma=3.0)
+            city_offset = pm.Normal("city_offset", mu=0.0, sigma=3.0, dims="city")
             city_temperature = pm.Deterministic(
                 "city_temperature", europe_mean + city_offset, dims="city"
             )
 
             data_dims = ("date", "city")
             data = pm.ConstantData("data", df_data, dims=data_dims)
-            _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims)
+            _ = pm.Normal(
+                "likelihood", mu=city_temperature, sigma=0.5, observed=data, dims=data_dims
+            )
 
             trace = pm.sample(
                 return_inferencedata=False,
diff --git a/pymc/tests/test_initial_point.py b/pymc/tests/test_initial_point.py
index c252594906..1a7560618e 100644
--- a/pymc/tests/test_initial_point.py
+++ b/pymc/tests/test_initial_point.py
@@ -95,12 +95,12 @@ def test_dependent_initvals(self):
     def test_nested_initvals(self):
         # See issue #5168
         with pm.Model() as pmodel:
-            one = pm.LogNormal("one", mu=np.log(1), sd=1e-5, initval="prior")
-            two = pm.Lognormal("two", mu=np.log(one * 2), sd=1e-5, initval="prior")
-            three = pm.LogNormal("three", mu=np.log(two * 2), sd=1e-5, initval="prior")
-            four = pm.LogNormal("four", mu=np.log(three * 2), sd=1e-5, initval="prior")
-            five = pm.LogNormal("five", mu=np.log(four * 2), sd=1e-5, initval="prior")
-            six = pm.LogNormal("six", mu=np.log(five * 2), sd=1e-5, initval="prior")
+            one = pm.LogNormal("one", mu=np.log(1), sigma=1e-5, initval="prior")
+            two = pm.Lognormal("two", mu=np.log(one * 2), sigma=1e-5, initval="prior")
+            three = pm.LogNormal("three", mu=np.log(two * 2), sigma=1e-5, initval="prior")
+            four = pm.LogNormal("four", mu=np.log(three * 2), sigma=1e-5, initval="prior")
+            five = pm.LogNormal("five", mu=np.log(four * 2), sigma=1e-5, initval="prior")
+            six = pm.LogNormal("six", mu=np.log(five * 2), sigma=1e-5, initval="prior")
 
         ip_vals = list(make_initial_point_fn(model=pmodel, return_transformed=True)(0).values())
         assert np.allclose(np.exp(ip_vals), [1, 2, 4, 8, 16, 32], rtol=1e-3)
diff --git a/pymc/tests/test_mixture.py b/pymc/tests/test_mixture.py
index 3c061d5ef4..825207d0bb 100644
--- a/pymc/tests/test_mixture.py
+++ b/pymc/tests/test_mixture.py
@@ -58,15 +58,15 @@
 from pymc.tests.test_distributions_random import pymc_random
 
 
-def generate_normal_mixture_data(w, mu, sd, size=1000):
+def generate_normal_mixture_data(w, mu, sigma, size=1000):
     component = np.random.choice(w.size, size=size, p=w)
-    mu, sd = np.broadcast_arrays(mu, sd)
+    mu, sigma = np.broadcast_arrays(mu, sigma)
     out_size = to_tuple(size) + mu.shape[:-1]
     mu_ = np.array([mu[..., comp] for comp in component.ravel()])
-    sd_ = np.array([sd[..., comp] for comp in component.ravel()])
+    sigma_ = np.array([sigma[..., comp] for comp in component.ravel()])
     mu_ = np.reshape(mu_, out_size)
-    sd_ = np.reshape(sd_, out_size)
-    x = np.random.normal(mu_, sd_, size=out_size)
+    sigma_ = np.reshape(sigma_, out_size)
+    x = np.random.normal(mu_, sigma_, size=out_size)
 
     return x
 
@@ -472,8 +472,8 @@ def test_list_poissons_sampling(self):
     def test_list_normals_sampling(self):
         norm_w = np.array([0.75, 0.25])
         norm_mu = np.array([0.0, 5.0])
-        norm_sd = np.ones_like(norm_mu)
-        norm_x = generate_normal_mixture_data(norm_w, norm_mu, norm_sd, size=1000)
+        norm_sigma = np.ones_like(norm_mu)
+        norm_x = generate_normal_mixture_data(norm_w, norm_mu, norm_sigma, size=1000)
 
         with Model() as model:
             w = Dirichlet("w", floatX(np.ones_like(norm_w)), shape=norm_w.size)
@@ -715,8 +715,8 @@ class TestNormalMixture(SeededTest):
     def test_normal_mixture_sampling(self):
         norm_w = np.array([0.75, 0.25])
         norm_mu = np.array([0.0, 5.0])
-        norm_sd = np.ones_like(norm_mu)
-        norm_x = generate_normal_mixture_data(norm_w, norm_mu, norm_sd, size=1000)
+        norm_sigma = np.ones_like(norm_mu)
+        norm_x = generate_normal_mixture_data(norm_w, norm_mu, norm_sigma, size=1000)
 
         with Model() as model:
             w = Dirichlet("w", floatX(np.ones_like(norm_w)), shape=norm_w.size)
@@ -746,7 +746,7 @@ def test_normal_mixture_nd(self, nd, ncomp):
         test_mus = np.random.randn(*comp_shape)
         test_taus = np.random.gamma(1, 1, size=comp_shape)
         observed = generate_normal_mixture_data(
-            w=np.ones(ncomp) / ncomp, mu=test_mus, sd=1 / np.sqrt(test_taus), size=10
+            w=np.ones(ncomp) / ncomp, mu=test_mus, sigma=1 / np.sqrt(test_taus), size=10
         )
 
         with Model() as model0:
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 01d5f0c0f8..7f69566e99 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -58,7 +58,7 @@ def __init__(self, mean=0, sigma=1, name="", model=None):
         super().__init__(name, model)
         self.register_rv(Normal.dist(mu=mean, sigma=sigma), "v1")
         Normal("v2", mu=mean, sigma=sigma)
-        Normal("v3", mu=mean, sigma=Normal("sd", mu=10, sigma=1, initval=1.0))
+        Normal("v3", mu=mean, sigma=Normal("sigma", mu=10, sigma=1, initval=1.0))
         Deterministic("v3_sq", self.v3**2)
         Potential("p1", at.constant(1))
 
@@ -626,7 +626,7 @@ def test_set_initval():
 
     with pm.Model(rng_seeder=rng) as model:
         eta = pm.Uniform("eta", 1.0, 2.0, size=(1, 1))
-        mu = pm.Normal("mu", sd=eta, initval=[[100]])
+        mu = pm.Normal("mu", sigma=eta, initval=[[100]])
         alpha = pm.HalfNormal("alpha", initval=100)
         value = pm.NegativeBinomial("value", mu=mu, alpha=alpha)
 
diff --git a/pymc/tests/test_model_graph.py b/pymc/tests/test_model_graph.py
index 091a4ad316..14e3e0ebec 100644
--- a/pymc/tests/test_model_graph.py
+++ b/pymc/tests/test_model_graph.py
@@ -102,7 +102,7 @@ def model_with_dims():
     with pm.Model(coords={"city": ["Aachen", "Maastricht", "London", "Bergheim"]}) as pmodel:
         economics = pm.Uniform("economics", lower=-1, upper=1, shape=(1,))
 
-        population = pm.HalfNormal("population", sd=5, dims=("city"))
+        population = pm.HalfNormal("population", sigma=5, dims=("city"))
 
         time = pm.ConstantData("time", [2014, 2015, 2016], dims="year")
 
diff --git a/pymc/tests/test_ode.py b/pymc/tests/test_ode.py
index 647fff57e7..acb0280508 100644
--- a/pymc/tests/test_ode.py
+++ b/pymc/tests/test_ode.py
@@ -219,7 +219,7 @@ def system_1(y, t, p):
     manual_logp = norm.logpdf(x=np.ravel(yobs), loc=np.ravel(integrated_solution), scale=1).sum()
     with pm.Model() as model_1:
         forward = ode_model(theta=[alpha], y0=[y0])
-        y = pm.Normal("y", mu=forward, sd=1, observed=yobs)
+        y = pm.Normal("y", mu=forward, sigma=1, observed=yobs)
     pymc_logp = model_1.compile_logp()({})
 
     np.testing.assert_allclose(manual_logp, pymc_logp)
@@ -369,7 +369,7 @@ def system(y, t, p):
             y0 = pm.LogNormal("y0", 0, 1)
             sigma = pm.HalfCauchy("sigma", 1)
             forward = ode_model(theta=[alpha], y0=[y0])
-            y = pm.LogNormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs)
+            y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
             with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
                 idata = pm.sample(50, tune=0, chains=1)
@@ -400,7 +400,7 @@ def system(y, t, p):
             y0 = pm.LogNormal("y0", 0, 1)
             sigma = pm.HalfCauchy("sigma", 1)
             forward = ode_model(theta=[alpha, beta], y0=[y0])
-            y = pm.LogNormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs)
+            y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
             with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
                 idata = pm.sample(50, tune=0, chains=1)
@@ -442,7 +442,7 @@ def system(y, t, p):
             R = pm.LogNormal("R", 1, 5, initval=1)
             sigma = pm.HalfCauchy("sigma", 1, shape=2, initval=[0.5, 0.5])
             forward = ode_model(theta=[R], y0=[0.99, 0.01])
-            y = pm.LogNormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs)
+            y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
             with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
                 idata = pm.sample(50, tune=0, chains=1)
@@ -483,7 +483,7 @@ def system(y, t, p):
             gamma = pm.HalfCauchy("gamma", 1, initval=1)
             sigma = pm.HalfCauchy("sigma", 1, shape=2, initval=[1, 1])
             forward = ode_model(theta=[beta, gamma], y0=[0.99, 0.01])
-            y = pm.LogNormal("y", mu=pm.math.log(forward), sd=sigma, observed=yobs)
+            y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
             with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
                 idata = pm.sample(50, tune=0, chains=1)
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
index 9090189a7b..359d58bb8a 100644
--- a/pymc/tests/test_sampling.py
+++ b/pymc/tests/test_sampling.py
@@ -707,7 +707,7 @@ def test_model_shared_variable(self):
         x_shared = aesara.shared(x)
         y_shared = aesara.shared(y)
         with pm.Model(rng_seeder=rng) as model:
-            coeff = pm.Normal("x", mu=0, sd=1)
+            coeff = pm.Normal("x", mu=0, sigma=1)
             logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))
 
             obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
@@ -1106,12 +1106,14 @@ def test_density_dist(self):
         obs = np.random.normal(-1, 0.1, size=10)
         with pm.Model():
             mu = pm.Normal("mu", 0, 1)
-            sd = pm.HalfNormal("sd", 1e-6)
+            sigma = pm.HalfNormal("sigma", 1e-6)
             a = pm.DensityDist(
                 "a",
                 mu,
-                sd,
-                random=lambda mu, sd, rng=None, size=None: rng.normal(loc=mu, scale=sd, size=size),
+                sigma,
+                random=lambda mu, sigma, rng=None, size=None: rng.normal(
+                    loc=mu, scale=sigma, size=size
+                ),
                 observed=obs,
             )
             prior = pm.sample_prior_predictive(return_inferencedata=False)
@@ -1121,15 +1123,15 @@ def test_density_dist(self):
     def test_shape_edgecase(self):
         with pm.Model():
             mu = pm.Normal("mu", size=5)
-            sd = pm.Uniform("sd", lower=2, upper=3)
-            x = pm.Normal("x", mu=mu, sigma=sd, size=5)
+            sigma = pm.Uniform("sigma", lower=2, upper=3)
+            x = pm.Normal("x", mu=mu, sigma=sigma, size=5)
             prior = pm.sample_prior_predictive(10)
         assert prior.prior["mu"].shape == (1, 10, 5)
 
     def test_zeroinflatedpoisson(self):
         with pm.Model():
             mu = pm.Beta("mu", alpha=1, beta=1)
-            psi = pm.HalfNormal("psi", sd=1)
+            psi = pm.HalfNormal("psi", sigma=1)
             pm.ZeroInflatedPoisson("suppliers", psi=psi, mu=mu, size=20)
             gen_data = pm.sample_prior_predictive(samples=5000)
             assert gen_data.prior["mu"].shape == (1, 5000)
diff --git a/pymc/tests/test_shape_handling.py b/pymc/tests/test_shape_handling.py
index 5cfb481f20..1fa7e3df30 100644
--- a/pymc/tests/test_shape_handling.py
+++ b/pymc/tests/test_shape_handling.py
@@ -389,12 +389,12 @@ def test_observed_with_column_vector(self):
             # But the second shape is upcasted from an int32 vector
             cast64 = at.cast(at.constant([3, 1], dtype="int32"), dtype="int64")
 
-            pm.Normal("size64", mu=0, sd=1, size=size64, observed=obs)
-            pm.Normal("shape64", mu=0, sd=1, shape=size64, observed=obs)
+            pm.Normal("size64", mu=0, sigma=1, size=size64, observed=obs)
+            pm.Normal("shape64", mu=0, sigma=1, shape=size64, observed=obs)
             model.logp()
 
-            pm.Normal("size_cast64", mu=0, sd=1, size=cast64, observed=obs)
-            pm.Normal("shape_cast64", mu=0, sd=1, shape=cast64, observed=obs)
+            pm.Normal("size_cast64", mu=0, sigma=1, size=cast64, observed=obs)
+            pm.Normal("shape_cast64", mu=0, sigma=1, shape=cast64, observed=obs)
             model.logp()
 
     def test_dist_api_works(self):
diff --git a/pymc/tests/test_transforms.py b/pymc/tests/test_transforms.py
index 6931b3d668..08d06cf055 100644
--- a/pymc/tests/test_transforms.py
+++ b/pymc/tests/test_transforms.py
@@ -332,15 +332,15 @@ def check_vectortransform_elementwise_logp(self, model):
         close_to(a, b, np.abs(0.5 * (a + b) * tol))
 
     @pytest.mark.parametrize(
-        "sd,size",
+        "sigma,size",
         [
             (2.5, 2),
             (5.0, (2, 3)),
             (np.ones(3) * 10.0, (4, 3)),
         ],
     )
-    def test_half_normal(self, sd, size):
-        model = self.build_model(pm.HalfNormal, {"sd": sd}, size=size, transform=tr.log)
+    def test_half_normal(self, sigma, size):
+        model = self.build_model(pm.HalfNormal, {"sigma": sigma}, size=size, transform=tr.log)
         self.check_transform_elementwise_logp(model)
 
     @pytest.mark.parametrize("lam,size", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))])
@@ -421,7 +421,7 @@ def test_dirichlet(self, a, size):
     def test_normal_ordered(self):
         model = self.build_model(
             pm.Normal,
-            {"mu": 0.0, "sd": 1.0},
+            {"mu": 0.0, "sigma": 1.0},
             size=3,
             initval=np.asarray([-1.0, 1.0, 4.0]),
             transform=tr.ordered,
@@ -429,17 +429,17 @@ def test_normal_ordered(self):
         self.check_vectortransform_elementwise_logp(model)
 
     @pytest.mark.parametrize(
-        "sd,size",
+        "sigma,size",
         [
             (2.5, (2,)),
             (np.ones(3), (4, 3)),
         ],
     )
-    def test_half_normal_ordered(self, sd, size):
+    def test_half_normal_ordered(self, sigma, size):
         initval = np.sort(np.abs(np.random.randn(*size)))
         model = self.build_model(
             pm.HalfNormal,
-            {"sd": sd},
+            {"sigma": sigma},
             size=size,
             initval=initval,
             transform=tr.Chain([tr.log, tr.ordered]),
diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py
index c0500d3b97..345469a6f2 100644
--- a/pymc/tests/test_util.py
+++ b/pymc/tests/test_util.py
@@ -94,9 +94,9 @@ def test_hashing_of_rv_tuples():
     obs = np.random.normal(-1, 0.1, size=10)
     with pm.Model() as pmodel:
         mu = pm.Normal("mu", 0, 1)
-        sd = pm.Gamma("sd", 1, 2)
+        sigma = pm.Gamma("sigma", 1, 2)
         dd = pm.Normal("dd", observed=obs)
-        for freerv in [mu, sd, dd] + pmodel.free_RVs:
+        for freerv in [mu, sigma, dd] + pmodel.free_RVs:
             for structure in [
                 freerv,
                 {"alpha": freerv, "omega": None},