How to scale a probability density defined on an n-ball when mapping it to a half n-sphere

change-of-variablemultivariable-calculusprobability

I apologize for linking to images, but I can't put them inline because this is my first post.

To start off with, consider the 1-D case of transforming a probability distribution defined on the line $[-1, 1]$ to the semicircle.
You can uniformly sample from a semicircle by randomly sampling $\theta \in [0, \pi]$.
Here are some samples.
Letting $x = \cos(\theta)$, we can see that the $x$s follow an arcsine distribution.
If we want to calculate $p_\theta(\theta)$ given the density provided for $x$, we can do $p_{\theta}(\theta) = p_{x}(\cos(\theta)) |-\sin(\theta)|$. Indeed, if I approximate the distribution for $x$ by binning, I can approximately recover the true uniform density for $\theta$ using this transformation.

So what I want to know is how I can reproduce these steps for a 2-ball and a 2-hemisphere and a 3-ball and a 3-"hemisphere".
When I follow this same recipe for two dimensions using spherical coordinates, the results aren't right.
Here are some randomly sampled points from a hemisphere and here's the distribution of the $(x, y)$ coordinates on the circle.
The determinant of the Jacobian for $x$ and $y$ from $\theta$ and $\phi$ using spherical coordinates is $-\frac{\sin(2\theta)}{2}$.
However, when I use this scaling factor, I don't get the expected $\frac{1}{2\pi}$ (i.e., one over the surface area of the hemisphere) value when mapping to the hemisphere and the distribution looks pretty wonky.

I'm wondering if the fact that the area element of the circle is one while the area element of the sphere is $\sin(\theta)$ means I have to take different steps.
Anyway, intuitively I understand that I need to "stretch" the density when going from the ball to the hemisphere, but I don't know how to accomplish that.
Googling around, it seems like there should be a straightforward answer using some kind of change of variables, but this is math I'm not familiar with, so I'm struggling to come up with the right approach.
Any help is appreciated!

Here's the Python code to generate all the plots:

import matplotlib.pyplot as plt
import numpy as np

plt.rcParams.update({"font.size": 20})


def one_dimension():
    xs = np.linspace(-1, 1, 500)
    ys = np.sqrt(1 - xs**2)
    skip = 50
    samp_xs = xs[skip::skip]
    samp_ys = ys[skip::skip]
    plt.plot(xs, ys)
    plt.hlines(0, -1, 1)
    plt.vlines(samp_xs, 0, samp_ys)
    plt.plot(samp_xs, samp_ys, "o", label="$S^{1}$ Manifold Sample")
    plt.plot(samp_xs, np.zeros_like(samp_xs), "o", label="$\mathbb{R}^{1}$ Sample")
    plt.axis("equal")
    plt.axis("off")
    plt.legend(loc="upper right")
    plt.show()

    N = 1000000
    thetas = np.random.uniform(0, np.pi, size=N)
    (fig, axs) = plt.subplots(2, 2)
    axs[0, 0].hist(thetas)
    axs[0, 0].set_title(r"Samples of $\theta$")
    axs[0, 0].set_xlabel(r"$\theta$")
    axs[0, 0].set_ylabel("Count")

    xs = np.cos(thetas)
    axs[0, 1].hist(xs)
    axs[0, 1].set_title(r"Samples of $x = \cos(\theta)$")
    axs[0, 1].set_xlabel(r"$x$")
    axs[0, 1].set_ylabel("Count")

    n_bins = 500
    bins = np.linspace(-1, 1, n_bins + 1)[1:]
    labels = np.digitize(xs, bins, right=True)
    (unique_labels, counts) = np.unique(labels, return_counts=True)
    ps = counts / counts.sum()
    entropy = -np.sum(ps * np.log(ps))
    print(entropy)

    # See: https://nathancarter.github.io/how2data/site/how-to-plot-discrete-probability-distributions-in-python-using-scipy/.
    axs[1, 1].plot(unique_labels, ps, "o")
    axs[1, 1].vlines(unique_labels, 0, ps)
    axs[1, 1].set_ylim(bottom=0)
    axs[1, 1].set_title("Bin Probabilities for $x$")
    axs[1, 1].set_xlabel("$x$")
    axs[1, 1].set_ylabel("$p$")

    new_thetas = np.linspace(0, np.pi, 100)
    new_xs = np.cos(new_thetas)
    new_bins = np.digitize(new_xs, bins, right=True)
    bin_width = 2 / n_bins
    new_ps = ps[new_bins] * 1 / bin_width
    theta_ps = new_ps * np.abs(-1 * np.sin(new_thetas))

    axs[1, 0].plot(new_thetas, theta_ps, "o")
    axs[1, 0].vlines(new_thetas, 0, theta_ps)
    axs[1, 0].hlines(1 / np.pi, 0, np.pi, color="red", label=r"$\frac{1}{\pi}$")
    axs[1, 0].set_ylim(bottom=0)
    axs[1, 0].set_title(
        r"$p_{\theta}(\theta) = p_{\mathrm{Bin}(x)}(\cos(\theta)) \frac{1}{w} |-\sin(\theta)|$"
    )
    axs[1, 0].set_xlabel(r"$\theta$")
    axs[1, 0].set_ylabel(r"$p_{\theta}(\theta)$")
    axs[1, 0].legend(loc="upper center")
    plt.show()


def two_dimensions():
    N = 1000000
    # See: https://stats.stackexchange.com/questions/7977/how-to-generate-uniformly-distributed-points-on-the-surface-of-the-3-d-unit-sphe.
    points = np.random.normal(size=(N, 3))
    points /= np.linalg.norm(points, axis=1, keepdims=True)
    points = points[points[:, 2] > 0]

    samps = 1000
    fig = plt.figure()
    ax = fig.add_subplot(projection="3d")
    ax.scatter(*np.hsplit(points[:samps], 3))
    # See: https://stackoverflow.com/a/72928548/1316276.
    limits = np.array([getattr(ax, f"get_{axis}lim")() for axis in "xyz"])
    ax.set_box_aspect(np.ptp(limits, axis=1))
    plt.show()

    n_bins = 25
    bins = np.linspace(-1, 1, n_bins + 1)[1:]
    xy_labels = np.digitize(points[:, :2], bins, right=True)
    (unique_labels, counts) = np.unique(xy_labels, return_counts=True, axis=0)
    all_counts = np.zeros((n_bins, n_bins))
    for ((col, row), count) in zip(unique_labels, counts):
        all_counts[row, col] = count

    plt.imshow(all_counts)
    plt.show()

    ps = all_counts / all_counts.sum()
    entropy = -np.sum(ps[ps > 0] * np.log(ps[ps > 0]))
    print(entropy)

    phis = np.linspace(0, 2 * np.pi, 100)
    thetas = np.linspace(np.pi / 2, np.pi, 100)
    xs = np.cos(thetas) * np.sin(phis)
    ys = np.sin(thetas) * np.sin(phis)
    new_xys = np.stack([xs, ys]).T

    new_bins = np.digitize(new_xys, bins, right=True)
    bin_width = 2 / n_bins
    new_ps = ps[new_bins[:, 0], new_bins[:, 1]] * 1 / bin_width**2
    cov_factor = np.abs(np.sin(2 * thetas) / 2)
    phi_theta_ps = new_ps * cov_factor
    plt.plot(np.arange(len(phi_theta_ps)), phi_theta_ps)
    plt.hlines(
        1 / (2 * np.pi), 0, len(phi_theta_ps), color="red", label=r"$\frac{1}{2\pi}$"
    )
    plt.legend()
    plt.show()


def main():
    one_dimension()
    two_dimensions()


if __name__ == "__main__":
    main()

Best Answer

MCMC might be useful here depending on the specifics. You can generate a chain of say 10k samples, then restrict the probability density (samples) to the region of interest. Your normalization factor would be something like that fraction of total samples. I can vouch for the “emcee” python package. At the very least, you can cross check your analytic estimates with the Monte Carlo results.

Related Question