[GIS] Cannot convert float Nan to integer with raster data

gdalosgeopython

I have a script that basically takes a low resolution raster grid and creates elevation band text file for this raster based on a high resolution DEM. The data can be accessed from here.
On Conda prompt I wrote:

python format_snow_params.py ~/VIC_GRID_Rstr1.tif ~/DEM.tif ~/snow_params.txt 492

Where VIC_GRID_Rstr1.tif is the lower resolution grid raster (basinMask), DEM is a high resolution raster(elvHiRes), snow_params.txt (outsnow) is the output file to generated and lastly 492 (interval) is the elevation interval in meters.

While running the script I get the following error:

File "format_snow_params.py", line 218, in <module>
    main()
  File "format_snow_params.py", line 212, in main
    format_snow_params(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
  File "format_snow_params.py", line 70, in format_snow_params
    elvhires[np.where(elvhires<0)] = np.nan
ValueError: cannot convert float NaN to integer

The whole script (obtained from github) is as follows:

#******************************************************************************
# FILE: format_snow_params.py
# AUTHOR: Kel Markert
# EMAIL: kel.markert@nasa.gov
# ORGANIZATION: NASA-SERVIR, UAH/ESSC
# MODIFIED BY: n/a
# CREATION DATE: 28 Oct. 2016
# LAST MOD DATE: 03 Apr. 2017
# PURPOSEe This script takes geotiff data for snow band parameterization and converts
#          it to the needed text format for the VIC model
# DEPENDENCIES: numpy, osgeo (gdal)
#******************************************************************************

# import dependencies
from __future__ import print_function
import os
import sys
import warnings
import numpy as np
from osgeo import gdal
from osgeo.gdalnumeric import *
from osgeo.gdalconst import *

# set system to ignore simple warnings
warnings.simplefilter("ignore")

def format_snow_params(basinMask,elvHiRes,outSnow,interval):
    """
    FUNCTION: format_snow_params
    ARGUMENTS: basinMask - path to template raster to run VIC model at
               elvHiRes - path elevation raster dataset at native resolution
               outsnow - path output snow parameter file
               interval - vertical distance to do equal interval segmentation
    KEYWORDS: n/a
    RETURNS: n/a
    NOTES: Does not return a variable but writes an output file
    """

    band = 1 # constant variable for reading in data

    interval = int(interval) # force equal interval value to be int type

    # make a list of input raster files
    infiles = [basinMask,elvHiRes]

    # try to read in the raster data
    try:

        # read basin grid raster
        ds = gdal.Open(infiles[0],GA_ReadOnly)
        b1 = ds.GetRasterBand(band)
        mask = BandReadAsArray(b1)
        maskRes = ds.GetGeoTransform()[1]
        ds = None
        b1 = None

        # read hi res elevation raster
        ds = gdal.Open(infiles[1],GA_ReadOnly)
        b1 = ds.GetRasterBand(band)
        elvhires = BandReadAsArray(b1)
        clsRes = ds.GetGeoTransform()[1]
        ds = None
        b1 = None

    # if not working, give error message
    except AttributeError:
        raise IOError('Raster file input error, check that all paths are correct')

    # mask elevation values less than 0
    elvhires[np.where(elvhires<0)] = np.nan

    # get ratio of high resoltion to low resolution
    clsRatio = int(maskRes/clsRes)

    # check if the output parameter file exists, if so delete it
    if os.path.exists(outSnow)==True:
        os.remove(outSnow)

    nbands = [] # blank list

    # try to write to output snow parameter file
    try:

        with open(outSnow, 'w') as f:

            cnt = 1 # set grid cell id counter

            # pass counter
            pass_counter = range(2)

            # perform two passes on the raster data
            # 1) to grab the maximum number of bands for a given pixel
            # 2) to calculate the snow band parameters and write to output file
            for pass_cnt in pass_counter:

                # loop over each pixel in the template raster
                for i in range(mask.shape[0]):
                    cy1 = i*clsRatio
                    cy2 = cy1+clsRatio

                    for j in range(mask.shape[1]):
                        cx1 = j*clsRatio
                        cx2 = cx1+clsRatio

                        # get all hi res pixels in a template pixel
                        tmp = elvhires[cy1:cy2,cx1:cx2]
                        if tmp.size == 0:
                tmp=tmp2[:]
                    
                        
                        # create blank array for number of bands calculation...
                        if mask[i,j] == 1: # ...if it is not a masked pixel
                tmp2=tmp

                if np.all(tmp == np.nan) == True:
                tmp[:,:] = 0
                
                            # find min and max values for interval
                            minelv = np.nanmin(tmp.astype(int)) - (np.nanmin(tmp.astype(int))%interval)
                        maxelv = np.nanmax(tmp.astype(int)) + (np.nanmax(tmp.astype(int))%interval)
                        
                            #print(np.min(tmp).mask)
                            # create an array of band limits
                        bands = np.arange(minelv, maxelv+interval,interval)

                        bcls = np.zeros_like(tmp)
                        bcls[:,:] = -1

                            # get the number of bands per pixel
                    for b in range(bands.size-1):

                                bcls[np.where((tmp>=bands[b])&(tmp<bands[b+1]))] = b # band counter

                                # if it's the first pass get number of bands for each pixel
                                if pass_cnt == 0:
                                    uniqcnt = np.unique(bcls[np.where(tmp>0)])
                            nbands.append(uniqcnt.size) # save to a list for second pass

                    if pass_cnt == 1:
                                uniqcnt = np.unique(bcls[np.where(tmp>0)])
                                #clscnt = np.bincount(tmp.ravel())

                                f.write('{0}\t'.format(cnt)) # write grid cell id

                                # find frational area for each band and write to file
                                for c in range(maxbands):
                                    try:
                                        idx = np.where(bcls==uniqcnt[c])
                                        num = np.float(bcls[np.where(bcls>=0)].size)
                    if num == 0:
                        num = np.float(idx[0].size)
                                        frac = np.float(idx[0].size) / num
                                    except IndexError:
                                        frac = 0

                            f.write('{0:.4f}\t'.format(frac))

                                # calculate the mean elevation for each band and write to file
                                for c in range(maxbands):
                                    try:
                                        idx = np.where(bcls==uniqcnt[c])
                                        muelv = np.nanmean(tmp[idx])

                                    except IndexError:
                                        muelv = 0

                                    f.write('{0:.4f}\t'.format(muelv))

                                # calculate the precipitation fractions and write to file
                            for c in range(maxbands):
                            try:
                                        idx = np.where(bcls==uniqcnt[c])
                    num = np.float(bcls[np.where(bcls>=0)].size)
                    if num == 0:
                        num = np.float(idx[0].size)
                                        frac = np.float(idx[0].size) / num
                                    except IndexError:
                                        frac = 0

                            f.write('{0:.4f}\t'.format(frac))

                        f.write('\n') # write return value for new line

                        if pass_cnt == 1 & mask[i,j] == 1:
                            cnt += 1 # plus one to the grid cell id counter

                if pass_cnt == 0:
                    maxbands = max(nbands) # maximum number of bands for a pixel

        # print the number of bands for user to input into global parameter file
        print('Number of maximum bands: {0}'.format(maxbands))

    # except raise an error when it doesn't work
    except IOError:
        raise IOError('Cannot write output file, error with output snow parameter file path')

    return

def main():
    n_args = len(sys.argv)

    # Check user inputs
    if n_args != 5:
        print ("Wrong user input")
        print ("Script writes the snow band parameter file for the VIC model")
        print ("usage: python format_snow_params.py <template raster> <elevation raster> <output snow band file> <interval for snow bands>")
        print ("Exiting system...")
        sys.exit()

    else:
        # Pass command line arguments into function
        format_snow_params(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])

    return

# Execute the main level program if run as standalone
if __name__ == "__main__":
    main()

I did a try a solution mentioned here, by basically changing the line:

elvhires[np.where(elvhires<0)] = np.nan

elvhires=np.where(elvhires<0,np.NaN,elvhires)

But this then gives me another error:

File "format_snow_params.py", line 218, in <module>
    main()
  File "format_snow_params.py", line 212, in main
    format_snow_params(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
  File "format_snow_params.py", line 188, in format_snow_params
    maxbands = max(nbands) # maximum number of bands for a pixel
ValueError: max() arg is an empty sequence

How can I resolve this issue?

Update 1

Based on the feedback, I exported (using ArcGIS) the DEM (DEM_0-n2) again in .tif format and set no data to 0, but the Float Nan error still remains. And if I remove the line

elvhires[np.where(elvhires<0)] = np.nan

then I get the error at line 188, which is:

ValueError: max() arg is an empty sequence

Update 2

Even when I view the DEM values in array form in Python, the actual values of the DEM start at column 910-1134. The rest of the columns are all zeros. I am attaching an screenshot of the array just for reference. The script can also be downloaded from the same link. I also tried the script both in Python 2.7 and 3.6, but the error still remains the same. Please let me know if there are data access issues.

Best Answer

It might be worth avoiding use of np.NaN altogether. NaN literally means "not a number", and it cannot be converted to an integer.

There are two ways of doing this, depending on the nature of the data, and what the negative numbers mean in that data (it is the negative values that the script is attempting to convert to np.Nan).

Option 1:

Delete that np.Nan line from the script altogether, and leave the negative numbers as they are, and allow them to be converted to their negative integer equivalents.

Option 2:

Choose a number that never appears in the raster (eg, 99999) and use that instead. Depending on the nature of the data (and what the negative numbers represent), perhaps even 0 might be an appropriate number.

Depending on how you intend to use the data afterwards, and if it is appropriate, you may then be able to configure the dataset to know that your chosen number represents "nodata" (in ArcGIS for example, you can select a "nodata" value for rasters).

Code

import gdal
import os

src = r'c:\bands'
dst = r'c:\cal_bands'

COEFF_LUT = {'b1': 10, 'b2': 20, 'b3': 30, 'b4': 40}
FORMAT = 'GTiff'


def band_num(f):
    """Given raster filename with band number info, returns band number"""
    filename = f.split('.')[0]
    band = filename.split('_')[-1]
    return band


def multiply_raster_by_coeff(file, band):
    """Given filename and band number of image, multiplies it by saves a copy
    in specified destination folder"""
    coeff = COEFF_LUT[band]
    dst_datatype = gdal.GDT_Float32
    driver = gdal.GetDriverByName(FORMAT)

    # Read file, get metadata
    band_ds = gdal.Open(os.path.join(src, file))
    band_DN = band_ds.GetRasterBand(1).ReadAsArray()
    geotransform = band_ds.GetGeoTransform()
    geoproj = band_ds.GetProjection()
    xsize = band_ds.RasterXSize
    ysize = band_ds.RasterYSize

    band_DN = band_DN.astype('f')

    # Multiply by band coefficient
    cal_band = band_DN * coeff

    # Write to disk
    new_name = 'cal_' + file
    output_path = os.path.join(dst, new_name)
    dst_ds = driver.Create(output_path, xsize, ysize, 1, dst_datatype)
    dst_ds.GetRasterBand(1).WriteArray(cal_band)
    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(geoproj)


if __name__ == '__main__':
    files = [f for f in os.listdir(path) if f.endswith('.tif')]
    image_bands = {f: band_num(f) for f in files}
    for file, band in image_bands:
        multiply_raster_by_coeff(file, band)

Conclusion

You can now handle an arbitrary number of bands just by updating the COEFF_LUT dictionary.

You could further break the multiply_raster_by_coeff function. Of course, performance-wise the two scripts are equivalent, but future you (and collaborators) will appreciate this if it needs changes.

Best Answer

Related Solutions

[GIS] UnicodeDecodeError: While installing gdal in a new Python Virtual Environment

[GIS] Efficient way to multiply specific raster with specific variable python

Code

Conclusion

Related Question