Probability – Is the Ratio of Medians a Good Estimate of the Median of Ratios?

For two random variables $X$ and $Y$, does it hold that the median of $\frac{X}{Y}$ is approximately equal to the median of $X$ divided by the median of $Y$:

$$
F_{X/Y}^{-1}(0.5) \approx \frac{F_{X}^{-1}(0.5)}{F_{Y}^{-1}(0.5)}?
$$

Could you please provide a derivation?

For means this does not hold in general. I did a short experiment where I took random samples from two distributions and computed the mean/median of the ratios and the ratio of means/medians. I repeated this several times. The ratios of means were sometimes way off, but the ratios of medians were always close to the median of ratios. So I'm wondering if there is a theoretical justification for that or if my experiments are just not extensive enough (I tested with 16 different distribution pairs).

I'm attaching experiment code if it helps:

library(tidyverse)

two_normals <- function (mu1, mu2, sigma1 = 1, sigma2 = 2) {
  return (function (nsamp) {
    x <- rnorm(nsamp, mu1, sigma1)
    y <- rnorm(nsamp, mu2, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals <- function (mu1, mu2, sigma1 = 1, sigma2 = 2) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu2, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals_correlated <- function (mu1, mu2, sigma1, 
                                      sigma2 = 2, linear_term = 1) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- x + linear_term * rnorm(nsamp, 0, sigma2)
    
    return (list(x = x, y = y))
  })
}

two_truncated_normals_correlated_negative <- function (mu1, mu2, 
            sigma1, sigma2 = 2, linear_term = 1) {
  return (function (nsamp) {
    x <- truncnorm::rtruncnorm(nsamp, 0, Inf, mu1, sigma1)
    y <- -x + linear_term * rnorm(nsamp, 0, sigma2)
    y <- y - min(y) + 1
    
    return (list(x = x, y = y))
  })
}

first_normal_second_gamma <- function (mu1, sigma1 = 1, shape, scale) {
  return (function (nsamp) {
    x <- rnorm(nsamp, mu1, sigma1)
    y <- rgamma(nsamp, shape, scale = scale)
    
    return (list(x = x, y = y))
  })
}

first_gamma_second_normal <- function (shape, scale, mu1, sigma1 = 1) {
  return (function (nsamp) {
    y <- rnorm(nsamp, mu1, sigma1)
    x <- rgamma(nsamp, shape, scale = scale)
    
    return (list(x = x, y = y))
  })
}

experiments <- c(
  "two_normals" = two_normals(20, 20),
  "two_truncated_normals_1" = two_truncated_normals(20, 20),
  "two_truncated_normals_2" = two_truncated_normals(20, 20, 10000),
  "two_truncated_normals_3" = two_truncated_normals(20, 20, 1, 10000),
  "two_truncated_normals_4" = two_truncated_normals(20, 20, 1000, 1),
  "two_truncated_normals_5" = two_truncated_normals(20, 20, 1000, 
                                                    1000),
  "two_truncated_normals_6" = two_truncated_normals(2, 2, 1, 1),
  "two_truncated_normals_7" = two_truncated_normals(2, 10, 1, 1),
  "two_truncated_normals_correlated_1" = 
    two_truncated_normals_correlated(20, 20, 1, 1, 1),
  "two_truncated_normals_correlated_2" = 
    two_truncated_normals_correlated(20, 20, 1, 4, 1),
  "two_truncated_normals_correlated_3" = 
   two_truncated_normals_correlated_negative(20, 20, 1, 1, 1),
  "two_truncated_normals_correlated_4" = 
   two_truncated_normals_correlated_negative(20, 20, 1, 4, 1),
  "first_normal_second_gamma_1" = first_normal_second_gamma(20, 1, 1, 
                                                            20),
  "first_normal_second_gamma_2" = first_normal_second_gamma(20, 1, 20, 
                                                            200),
  "first_gamma_second_normal_1" = first_gamma_second_normal(1, 20, 20, 
                                                            1),
  "first_gamma_second_normal_2" = first_gamma_second_normal(20, 200, 
                                                            20, 1)
)

n       <- 100
nsamp   <- 1000
out_df  <- tibble()

set.seed(1)

for (i in 1:length(experiments)) {
  mean_of_ratios   <- vector(mode = "numeric", length = n)
  ratio_of_means   <- vector(mode = "numeric", length = n)
  median_of_ratios <- vector(mode = "numeric", length = n)
  ratio_of_medians <- vector(mode = "numeric", length = n)
  
  for (j in 1:n) {
    samples <- experiments[[i]](nsamp)
    x       <- samples[[1]]
    y       <- samples[[2]]

    mean_of_ratios[j]   <- mean(x / y)
    ratio_of_means[j]   <- mean(x) / mean(y)
    median_of_ratios[j] <- median(x / y)
    ratio_of_medians[j] <- median(x) / median(y)
  }
  
  df <- tibble(
    "experiment"       = names(experiments)[i],
    "mean_of_ratios"   = mean_of_ratios,
    "ratio_of_means"   = ratio_of_means,
    "median_of_ratios" = median_of_ratios,
    "ratio_of_medians" = ratio_of_medians,
  )
  
  out_df <- out_df %>% bind_rows(df)
}

ggplot(out_df, aes(x = mean_of_ratios, y = ratio_of_means)) +
  geom_point() +
  facet_wrap(~ experiment, scales = "free") +
  geom_abline(aes(slope = 1, intercept = 0))

ggplot(out_df, aes(x = median_of_ratios, y = ratio_of_medians)) +
  geom_point() +
  facet_wrap(~ experiment, scales = "free") +
  geom_abline(aes(slope = 1, intercept = 0))

Images:

Best Answer

Counterexample: Consider $X$ distributed $0.4\delta_{0.1}+0.6\delta_{100}$, where $\delta_z$ denotes the Dirac measure in $z$, i.e., $X$ is 0.1 with probability 0.4 and 100 with probability 0.6. Distribution of $Y$ is $0.6\delta_{0.1}+0.4\delta_{100}$. Med(X)=100, Med(Y)=0.1, Med(X)/Med(Y)=1000. The distribution of $X/Y$ is $0.16*\delta_{0.001}+0.48\delta_1+0.36\delta_{1000}$, its median is 1.

Best Answer

Related Solutions

Median Calculation – How to Calculate Median of Medians

Solved – K-medians, formula to compute the median

Related Question