Probability – Is the Ratio of Medians a Good Estimate of the Median of Ratios?

distributionsmeanmedianprobability

For two random variables $X$ and $Y$, does it hold that the median of $\frac{X}{Y}$ is approximately equal to the median of $X$ divided by the median of $Y$:

$$
F_{X/Y}^{-1}(0.5) \approx \frac{F_{X}^{-1}(0.5)}{F_{Y}^{-1}(0.5)}?
$$

Could you please provide a derivation?

For means this does not hold in general. I did a short experiment where I took random samples from two distributions and computed the mean/median of the ratios and the ratio of means/medians. I repeated this several times. The ratios of means were sometimes way off, but the ratios of medians were always close to the median of ratios. So I'm wondering if there is a theoretical justification for that or if my experiments are just not extensive enough (I tested with 16 different distribution pairs).

I'm attaching experiment code if it helps:

library(tidyverse)

two_normals <- function (mu1, mu2, sigma1 = 1, sigma2 = 2) {
  return (function (nsamp) {
    x <- rnorm(nsamp, mu1, sigma1)
    y <- rnorm(nsamp, mu2, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals <- function (mu1, mu2, sigma1 = 1, sigma2 = 2) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu2, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals_correlated <- function (mu1, mu2, sigma1, 
                                      sigma2 = 2, linear_term = 1) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- x + linear_term * rnorm(nsamp, 0, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals_correlated_negative <- function (mu1, mu2, 
            sigma1, sigma2 = 2, linear_term = 1) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- -x + linear_term * rnorm(nsamp, 0, sigma2)
    y <- y - min(y) + 1
    
    return (list(x = x, y = y))
  })
}

first_normal_second_gamma <- function (mu1, sigma1 = 1, shape, scale) {
  return (function (nsamp) {
    x <- rnorm(nsamp, mu1, sigma1)
    y <- rgamma(nsamp, shape, scale = scale)
    
    return (list(x = x, y = y))
  })
}

first_gamma_second_normal <- function (shape, scale, mu1, sigma1 = 1) {
  return (function (nsamp) {
    y <- rnorm(nsamp, mu1, sigma1)
    x <- rgamma(nsamp, shape, scale = scale)
    
    return (list(x = x, y = y))
  })
}

experiments <- c(
  "two_normals" = two_normals(20, 20),
  "two_truncated_normals_1" = two_truncated_normals(20, 20),
  "two_truncated_normals_2" = two_truncated_normals(20, 20, 10000),
  "two_truncated_normals_3" = two_truncated_normals(20, 20, 1, 10000),
  "two_truncated_normals_4" = two_truncated_normals(20, 20, 1000, 1),
  "two_truncated_normals_5" = two_truncated_normals(20, 20, 1000, 
                                                    1000),
  "two_truncated_normals_6" = two_truncated_normals(2, 2, 1, 1),
  "two_truncated_normals_7" = two_truncated_normals(2, 10, 1, 1),
  "two_truncated_normals_correlated_1" = 
    two_truncated_normals_correlated(20, 20, 1, 1, 1),
  "two_truncated_normals_correlated_2" = 
    two_truncated_normals_correlated(20, 20, 1, 4, 1),
  "two_truncated_normals_correlated_3" = 
   two_truncated_normals_correlated_negative(20, 20, 1, 1, 1),
  "two_truncated_normals_correlated_4" = 
   two_truncated_normals_correlated_negative(20, 20, 1, 4, 1),
  "first_normal_second_gamma_1" = first_normal_second_gamma(20, 1, 1, 
                                                            20),
  "first_normal_second_gamma_2" = first_normal_second_gamma(20, 1, 20, 
                                                            200),
  "first_gamma_second_normal_1" = first_gamma_second_normal(1, 20, 20, 
                                                            1),
  "first_gamma_second_normal_2" = first_gamma_second_normal(20, 200, 
                                                            20, 1)
)

n       <- 100
nsamp   <- 1000
out_df  <- tibble()

set.seed(1)

for (i in 1:length(experiments)) {
  mean_of_ratios   <- vector(mode = "numeric", length = n)
  ratio_of_means   <- vector(mode = "numeric", length = n)
  median_of_ratios <- vector(mode = "numeric", length = n)
  ratio_of_medians <- vector(mode = "numeric", length = n)
  
  for (j in 1:n) {
    samples <- experiments[[i]](nsamp)
    x       <- samples[[1]]
    y       <- samples[[2]]

    mean_of_ratios[j]   <- mean(x / y)
    ratio_of_means[j]   <- mean(x) / mean(y)
    median_of_ratios[j] <- median(x / y)
    ratio_of_medians[j] <- median(x) / median(y)
  }
  
  df <- tibble(
    "experiment"       = names(experiments)[i],
    "mean_of_ratios"   = mean_of_ratios,
    "ratio_of_means"   = ratio_of_means,
    "median_of_ratios" = median_of_ratios,
    "ratio_of_medians" = ratio_of_medians,
  )
  
  out_df <- out_df %>% bind_rows(df)
}

ggplot(out_df, aes(x = mean_of_ratios, y = ratio_of_means)) +
  geom_point() +
  facet_wrap(~ experiment, scales = "free") +
  geom_abline(aes(slope = 1, intercept = 0))

ggplot(out_df, aes(x = median_of_ratios, y = ratio_of_medians)) +
  geom_point() +
  facet_wrap(~ experiment, scales = "free") +
  geom_abline(aes(slope = 1, intercept = 0))

Images:
Ratio of means
Ratio of medians

Best Answer

Counterexample: Consider $X$ distributed $0.4\delta_{0.1}+0.6\delta_{100}$, where $\delta_z$ denotes the Dirac measure in $z$, i.e., $X$ is 0.1 with probability 0.4 and 100 with probability 0.6. Distribution of $Y$ is $0.6\delta_{0.1}+0.4\delta_{100}$. Med(X)=100, Med(Y)=0.1, Med(X)/Med(Y)=1000. The distribution of $X/Y$ is $0.16*\delta_{0.001}+0.48\delta_1+0.36\delta_{1000}$, its median is 1.