[GIS] Convert Raster to Numpy Array with only Arcpy and Numpy

#memory#errorarcgis-10.3arcpynumpyraster

All the articles I'm finding including GDAL or PIL(?) which I cannot use. So I am trying to convert a raster I have to a Numpy Array with Arcpy and Numpy and then calculate some statistics on it but running into an error. Not much documentation on this and not sure what I am doing wrong. I am running ArcGIS 10.3.1 with Python 2.7.8 on a Windows 7 machine with 16gb ram.

Here is my code:

import arcpy
from arcpy.sa import *
from arcpy import env
import numpy as np
import sys, os, string, glob

#Set Environment
arcpy.CheckOutExtension("spatial")
arcpy.env.overwriteOutput = True
env.extent = "MINOF"
Coordsystem = "GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]"

#Set Input Data
tcorr = arcpy.Raster(r"G:/P38R37/test4/TcorrCon/Tcorr198.tif")
lowerLeft = arcpy.Point(tcorr.extent.XMin,tcorr.extent.YMin)
cellSize = tcorr.meanCellWidth

#Convert Raster to Numpy Array
#Raster is 7951 columns x 6971 rows, Floating point 32 bit, 211MB in size
tcorr_arr = arcpy.RasterToNumPyArray(tcorr,lowerLeft,7951,6971,nodata_to_value=0)

#Calculate the mean and standard deviation of the array
arr_mean = np.mean(tcorr_arr)
arr_std = np.std(tcorr_arr)

#Take the mean and std and do math
double_std = arr_std * 2
cfactor = mean - double_std

print cfactor

The error I am getting says:

Traceback (most recent call last):
File "C:\Python27\ArcGIS10.3\Lib\site-packages\pythonwin\pywin\framework\scriptutils.py", line 326, in RunScript
exec codeObject in __main__.__dict__
File "C:\Users\mschauer\Documents\Py Scripts\scratch\numpytest_tcorr.py", line 20, in <module>
tcorr_arr = arcpy.RasterToNumPyArray(tcorr,lowerLeft,7951,6971,nodata_to_value=0)
File "C:\ArcGIS\Desktop10.3\ArcPy\arcpy\__init__.py", line 2244, in RasterToNumPyArray
return _RasterToNumPyArray(*args, **kwargs)

MemoryError

Any ideas as to what I am doing wrong?

Best Answer

One solution to avoid your 32-bit memory woes is to read the raster in chunks on to a memory-mapped array, then do your calculations. Something like this:

# Make a dict of data type references
dtyperef = {'U1': 'bool',
            'U2': 'int8',
            'U4': 'int8',
            'U8': 'uint8',
            'S8': 'int8',
            'U16': 'uint16',
            'S16': 'int16',
            'U32': 'uint32',
            'S32': 'int32',
            'F32': 'float32',
            'F64': 'float64'}
# Get some info
shape = (tcorr.height, tcorr.width)
dtype = dtyperef[tcorr.pixelType]
nodata = tcorr.noDataValue
cellsize_x = tcorr.meanCellWidth
cellsize_y = tcorr.meanCellHeight
topLeft = (tcorr.extent.XMin, tcorr.extent.YMax)

# Create output array using temporary file
import tempfile
with tempfile.TemporaryFile() as tf:
    tcorr_arr = np.memmap(tf, mode='w+', shape=shape, dtype=dtype)

    # Populate array using chunks
    ychunks = range(0, shape[0], 128)
    xchunks = range(0, shape[1], 128)
    for ych in zip(ychunks[:-1], ychunks[1:-1] + [shape[0]]):
        for xch in zip(xchunks[:-1], xchunks[1:-1] + [shape[1]]):
            xblock, yblock = xch[1] - xch[0], ych[1] - ych[0]
            blc = arcpy.Point(topleft[0] + (xch[0] * cellsize_x),
                              topLeft[1] - (ych[1] * cellsize_y))
            tcorr_arr[ych[0]:ych[1], xch[0]:xch[1]] = arcpy.RasterToNumPyArray(tcorr, blc, xblock, yblock)

    # Mask the no data value
    tcorr_arr = np.ma.masked_equal(tcorr_arr, nodata)

    # Do some mathy stuff
    cfactor = tcorr_arr.mean() - (tcorr_arr.std() * 2)

print cfactor
Related Question