Python – Creating a TIFF File Using GDAL from NumPy Array and Specifying NoData Value

gdalnodatanumpypython

I have some tiff files I obtained from splitting a bigger tiff file using gdal translate. The border tiff files have some nodata values, which I can see in numpy as number 15. I want to manipulate these files as numpy arrays, and then write them back to tiff file. How can I write a numpy array to tiff file, while mantaining the number 15 as the nodata value, so they can be read propertly is GIS programs?

Best Answer

The two functions from the code snippet below, create_raster and numpy_array_to_raster should do the trick. In terms of maintaining the NoData value from the array in the output raster, that is set on the band(s) of a raster with the .SetNoDataValue() method which in this code snippet is used in the numpy_array_to_raster function. For more information about using gdal & numpy for raster processing I would highly recommend Chris Garrard's book Geoprocessing with Python and for a quick reference this gdal/ogr cookbook page is a great resource.

import os
from osgeo import gdal
from osgeo import osr
import numpy

# config
GDAL_DATA_TYPE = gdal.GDT_Int32 
GEOTIFF_DRIVER_NAME = r'GTiff'
NO_DATA = 15
SPATIAL_REFERENCE_SYSTEM_WKID = 4326

def create_raster(output_path,
                  columns,
                  rows,
                  nband = 1,
                  gdal_data_type = GDAL_DATA_TYPE,
                  driver = GEOTIFF_DRIVER_NAME):
    ''' returns gdal data source raster object

    '''
    # create driver
    driver = gdal.GetDriverByName(driver)

    output_raster = driver.Create(output_path,
                                  int(columns),
                                  int(rows),
                                  nband,
                                  eType = gdal_data_type)    
    return output_raster

def numpy_array_to_raster(output_path,
                          numpy_array,
                          upper_left_tuple,
                          cell_resolution,
                          nband = 1,
                          no_data = NO_DATA,
                          gdal_data_type = GDAL_DATA_TYPE,
                          spatial_reference_system_wkid = SPATIAL_REFERENCE_SYSTEM_WKID,
                          driver = GEOTIFF_DRIVER_NAME):
    ''' returns a gdal raster data source

    keyword arguments:

    output_path -- full path to the raster to be written to disk
    numpy_array -- numpy array containing data to write to raster
    upper_left_tuple -- the upper left point of the numpy array (should be a tuple structured as (x, y))
    cell_resolution -- the cell resolution of the output raster
    nband -- the band to write to in the output raster
    no_data -- value in numpy array that should be treated as no data
    gdal_data_type -- gdal data type of raster (see gdal documentation for list of values)
    spatial_reference_system_wkid -- well known id (wkid) of the spatial reference of the data
    driver -- string value of the gdal driver to use

    '''

    print 'UL: (%s, %s)' % (upper_left_tuple[0],
                            upper_left_tuple[1])

    rows, columns = numpy_array.shape
    print 'ROWS: %s\n COLUMNS: %s\n' % (rows,
                                        columns)

    # create output raster
    output_raster = create_raster(output_path,
                                  int(columns),
                                  int(rows),
                                  nband,
                                  gdal_data_type) 

    geotransform = (upper_left_tuple[0],
                    cell_resolution,
                    upper_left_tuple[1] + cell_resolution,
                    -1 *(cell_resolution),
                    0,
                    0)

    spatial_reference = osr.SpatialReference()
    spatial_reference.ImportFromEPSG(spatial_reference_system_wkid)
    output_raster.SetProjection(spatial_reference.ExportToWkt())
    output_raster.SetGeoTransform(geotransform)
    output_band = output_raster.GetRasterBand(1)
    output_band.SetNoDataValue(no_data)
    output_band.WriteArray(numpy_array)          
    output_band.FlushCache()
    output_band.ComputeStatistics(False)

    if os.path.exists(output_path) == False:
        raise Exception('Failed to create raster: %s' % output_path)

    return  output_raster