Google Earth Engine – Understanding Otsu Method Code in GEE

arraygoogle-earth-enginehistogramotsu

I hope this question is OK, I know it's not usual kind of questions that are being posted here.

I am trying to apply Otsu method on image in GEE. For that I have used the code that can be found here :
https://medium.com/google-earth/otsus-method-for-image-segmentation-f5c48f405e

The code is good and I believe it works, but my problem is that I don't fully understand the code and I would like to understand it better.

I have understood how the method works by reading this tutorial:
http://www.labbookpages.co.uk/software/imgProc/otsuThreshold.html,
but still' the steps are not clear to me.

I put here the code with my questions regard what different steps do when each row that I don't understand marked with // above the row.

///////////////////////Otsu

// Compute the histogram of the Difference one band image.  The mean and variance are only FYI.
var histogram = difference.select('VH').reduceRegion({
  reducer: ee.Reducer.histogram(255, 2)
      .combine('mean', null, true)
      .combine('variance', null, true), 
  geometry: geometry, 
  scale: 30,
  bestEffort: true
});
print(histogram);

// Chart the histogram
print(Chart.image.histogram(difference.select('VH'), geometry, 30));

///////////Otsu
  // Return the DN that maximizes interclass variance in VH (in the region).
var otsu = function(histogram) {
  //does count here take the histogram and convert it to array?
  var counts = ee.Array(ee.Dictionary(histogram).get('histogram'));
  //what is bucketmeans? is it the mean of each bar in the histogram?
  var means = ee.Array(ee.Dictionary(histogram).get('bucketMeans'));
  //I know in array size will show the dimensions. here we want to number of rows in the array from hostogram?
  var size = means.length().get([0]);
  //this step: not sure what happened here. we sum all the rows? what is get[0] in the end?
  var total = counts.reduce(ee.Reducer.sum(), [0]).get([0]);
  var sum = means.multiply(counts).reduce(ee.Reducer.sum(), [0]).get([0]);
  var mean = sum.divide(total);
  //and also not sure what indices is
  var indices = ee.List.sequence(1, size);

  // Compute between sum of squares, where each mean partitions the data.
  var bss = indices.map(function(i) {
    //hard to understand what is i because I don't know what indices is 
    var aCounts = counts.slice(0, 0, i);
    var aCount = aCounts.reduce(ee.Reducer.sum(), [0]).get([0]);
    var aMeans = means.slice(0, 0, i);
    var aMean = aMeans.multiply(aCounts)
        .reduce(ee.Reducer.sum(), [0]).get([0])
        .divide(aCount);
    var bCount = total.subtract(aCount);
    var bMean = sum.subtract(aCount.multiply(aMean)).divide(bCount);
    return aCount.multiply(aMean.subtract(mean).pow(2)).add(
           bCount.multiply(bMean.subtract(mean).pow(2)));
  });
    print(ui.Chart.array.values(ee.Array(bss), 0, means));

  // Return the mean value corresponding to the maximum BSS.
  return means.sort(bss).get([-1]);
};

var threshold = otsu(histogram.get('VH_histogram'));
//var threshold=25;
print('threshold', threshold);

My end goal: To understand the Otsu method code, what happens in each step- specifically the ones I asked inside the code.

Best Answer

I've tried to explain the steps a bit more. Hopefully this helps.

function otsu(histogram) {
  // Array of the pixel count in each bucket
  var counts = ee.Array(ee.Dictionary(histogram).get('histogram')) 
  // Array of the mean value for each bucket
  var means = ee.Array(ee.Dictionary(histogram).get('bucketMeans'))
  // The number of buckets
  var size = means.length().get([0])
  // The total number of pixels
  var total = counts.reduce(ee.Reducer.sum(), [0]).get([0])
  // Sum of all mean values
  var sum = means.multiply(counts).reduce(ee.Reducer.sum(), [0]).get([0])
  // Mean value for the whole image.
  var mean = sum.divide(total)

  // A list from 1 to the numbeer of buckets
  var indices = ee.List.sequence(1, size)

  // Compute between-sum-of-squares (BSS) for different thresholds, one per bucket.
  // Later on, we'll pick the best of these
  var bss = indices.map(function(i) {    
    // Array of pixel count
    // When i = 1, aCounts = [counts[0]], when i = 2, aCounts = [counts[0], counts[1]] etc
    var aCounts = counts.slice(0, 0, i)
    // Pixel count for class A
    var aCount = aCounts.reduce(ee.Reducer.sum(), [0]).get([0])
    // Like aCounts, but with the means
    var aMeans = means.slice(0, 0, i)
    // Mean for class A
    var aMean = aMeans.multiply(aCounts)
        .reduce(ee.Reducer.sum(), [0]).get([0])
        .divide(aCount)
    // Pixel count for class B
    var bCount = total.subtract(aCount)
    // Mean for class B
    var bMean = sum.subtract(aCount.multiply(aMean)).divide(bCount)
    // Calculate BSS for p = 2.
    return aCount.multiply(aMean.subtract(mean).pow(2)).add(
          bCount.multiply(bMean.subtract(mean).pow(2)))
  })

  print('BSS by mean', ui.Chart.array.values(ee.Array(bss), 0, means))

  // Return the mean value corresponding to the maximum BSS.
  return means
    // Sort the array of means based on their corresponding bucket's BSS
    .sort(bss)
     // Pick the last, i.e. the mean of the bucket that has the highest BSS
     // when used as a threshold
    .get([-1])
}
Related Question