Python – Iterating Through Raster Cells and Calculating Distance to Other Cells Using GDAL

gdalpythonraster

I have a binary raster (.tif) in ArcGIS that shows builtup areas (value=1) and non builtup areas (value=0) like this:

enter image description here

In order to make a proximity analysis, I need to iterate through the raster with arcpy/python and for each cell (i) get the distance to EVERY other cell (j) with value=1 within in a given radius around the current cell (i) and write all those distance values to a list/array.

Is there a performant way to do this using Python?

I know there is the EucDistance tool in ArcGIS:

enter image description here

but it may not be the smartest, to create several thousand new rasters.

My plan was to use a RasterToNumpyArray and then calculate on the numpy array

So far I tried the following Python code:

import numpy
from scipy.spatial import distance

def dist_euclid(cell_a, cell_b):
    dist = distance.euclidean(cell_a, cell_b)
    return dist

'''Masking is important to speed up processes'''
def cmask(coords, radius, array):
    a, b = coords[0], coords[1]
    nx, ny = array.shape
    y, x = numpy.ogrid[-a:nx - a, -b:ny - b]
    # create mask where each value in radius around coords is true and everything else is false
    mask = x * x + y * y <= radius * radius

    # reshape mask to rectangular shape
    i, j = numpy.where(mask)
    indices = numpy.meshgrid(numpy.arange(min(i), max(i) + 1),
                             numpy.arange(min(j), max(j) + 1),
                             indexing='ij')
    masked_array = array[tuple(indices)]
    return masked_array

def calcCellDistancesForEachCellInRaster(in_array, buffer_size):
    '''For each cell make slice sub-array around the cell and calculate distances to every other cell (value=1)'''
    dist_arr = []
    for (source_x, source_y), source_value in numpy.ndenumerate(in_array):
        if source_value == 1:
            masked_array = cmask([source_x, source_y], buffer_size, in_array)
            for (target_x, target_y), target_value in numpy.ndenumerate(masked_array):
                if target_value == 1 and ([source_x, source_y] != [target_x, target_y]):
                    dist = dist_euclid([source_x, source_y], [target_x, target_y])
                    if dist <= buffer_size:
                        dist_arr.append(dist)
                # print("dist from %s to %s = %s" % ([source_x, source_y], [target_x, target_y], dist))
                else:
                    pass
        else:
            pass
    return dist_arr

# Create a simple array from scratch using random values
myArray = numpy.random.randint(2, size=(100, 100))
myArray.shape = (100, 100)

dist_arr = calcCellDistancesForEachCellInRaster(myArray, 2)

Best Answer

I think this script does what you want, without iterating over each pixel in myArray:

import numpy as np
import scipy.ndimage as nd
import matplotlib.pyplot as plt

# Create a function that generates a circle (inside a square-shaped array) with
# different index values for each unique pixel and the corresponding distance to
# the center.
def make_circles(index):
    array_shape = (radius*2+1, radius*2+1)

    circle_index = np.zeros(array_shape) * np.nan
    circle_distance = np.zeros(array_shape) * np.nan

    a,b = (radius, radius)
    nx,ny = circle_index.shape
    y,x = np.ogrid[-a:nx-a,-b:ny-b]

    mask = x*x + y*y <= radius*radius
    mask[a,b] = False

    pixels_in_mask = np.sum(mask)

    circle_index[mask] = np.arange(0, pixels_in_mask, 1)
    circle_distance[mask] = np.sqrt(x*x + y*y)[mask]

    return circle_index, circle_distance

# Generate some dummy data
x = 100
y = 100
my_array = np.random.randint(2, size=(x, y))

# specify the radius in which to search for pixels in my_array with value = 1.
radius = 10

# Create circles.
circle_index, circle_distance = make_circles(radius)

plt.figure(1)
plt.title("Each pixel has a unique index number")
plt.imshow(circle_index)
plt.colorbar()

# Determine the maximum amount of pixels for which the distance 
# needs to be calculated per pixel.
distances_per_pixel = np.int(np.nanmax(circle_index) + 1)

# Create an array in which the results will be stored.
distances = np.zeros((distances_per_pixel, x, y))

# Determine the distances from every pixel to the current relative pixel as 
# defined by circle_index.
for position in circle_index[np.isfinite(circle_index)]:

    distance = circle_distance[circle_index == position][0]

    mask = np.fliplr(np.flipud(np.where(circle_index != position, 0, 1)))

    distance_map = nd.filters.convolve(my_array, mask, mode = 'constant', cval = 0.0) * distance

    distance_map[distance_map == 0.] = np.nan

    distances[np.int(position), :, :] = distance_map

# Each layer in the distances array corresponds to the "unique index number" as shown
# in figure 1. e.g. distances[3,:,:] shows a map that gives the distance to the circle_pixel
# with number 3, IF that pixel has value 1 in my_array.

# Calculate some other things, for example the shortest distance within a pixels
# radius to a pixel with my_array = 1.
shortest_distances = np.nanmin(distances, axis = 0)
longest_distances = np.nanmax(distances, axis = 0)
mean_distances = np.nanmean(distances, axis = 0)

# Plot a map showing the shortest distances.
plt.figure(2)
plt.imshow(shortest_distances, vmin = 0, vmax = radius)
plt.colorbar()

Be carefull though, as you increase the radius of you search area around each pixel, the size of the "distances" array will grow fast. Requiring more and more RAM-memory.

Related Question