Reduce the size of the file while I write an array to GeoTIFF using Python

python

I want to change the label of a GeoTIFF array.

This is what I am doing

import numpy as np
from osgeo import gdal

    def changeLabel(arr):
        arr[np.where( arr == 103 )] = 10        # class 1
        arr[np.where( arr == 33 )]  = 20        # class 2
        arr[np.where( arr == 65 )]  = 30        # class 3
        arr[np.where( arr == 1 )]   = 40        # class 4
        arr[np.where( arr == 9 )]   = 50        # class 5
        arr[np.where( arr == 17 )]  = 60        # class 6
        arr[np.where( arr == 129 )] =  0        # water / no data
        return arr

    def distanceFile(inFile, outFile):
        driver = gdal.GetDriverByName('GTiff')
        file = gdal.Open(inFile)
        band = file.GetRasterBand(1)
        lista = band.ReadAsArray()
        lista = changeLabel(lista)
    
        # create new file
        file2 = driver.Create(outFile, file.RasterXSize , file.RasterYSize , 1)
        b = file2.GetRasterBand(1)
        b.SetNoDataValue(0)
        b.SetNoDataValue(1)
        b.WriteArray(lista)
    
        # spatial ref system
        proj = file.GetProjection()
        georef = file.GetGeoTransform()
        file2.SetProjection(proj)
        file2.SetGeoTransform(georef)
        file2 = None

    inFile='inFile.tif'
    outFile='outFile.tif'
    ## Run the function
    distanceFile(inFile,outFile)

However while inFile has a size of 150 MB, the outFile size is 6.7 GB. How can I reduce the size of the produced file?

Best Answer

You want to use a sensible data type (UInt8 should work for your data) and compress the output. You can do this with the Create statement, and use LZW (or DEFLATE) compression:

file2 = driver.Create(outFile, xsize=file.RasterXSize, 
                     ysize=file.RasterYSize, bands=1,
                     eType=gdal.GDT_Byte,
                     options=['COMPRESS=LZW']
                     )

See more details about compression options here.