[GIS] Converting raster pixels to polygons with GDAL python

gdalgdal-polygonizepythonqgisraster-conversion

I want to convert raster pixels to vector polygons as below image that is output of "Processing Toolbox–> Vector Creation–> Raster pixels to polygons" in QGIS 3.8:

I tried to do this vectorize with below code:

from osgeo import gdal, ogr, osr
import sys
import os


#gdal.UseExceptions()
os.chdir("H:/aa")
fileName = "H:/aa/image.tif"
src_ds = gdal.Open(fileName)
srs = osr.SpatialReference()
srs.ImportFromWkt(src_ds.GetProjection())
if src_ds is None:
    print('Unable to open %s' % src_fileName)
    sys.exit(1)
srcband = src_ds.GetRasterBand(1)
print (srcband)
dst_layername = "H:/aa/aapoly"
drv = ogr.GetDriverByName("ESRI Shapefile")
dst_ds = drv.CreateDataSource(dst_layername + ".shp")
dst_layer = dst_ds.CreateLayer(dst_layername , srs = srs, geom_type=ogr.wkbMultiPolygon)
newField = ogr.FieldDefn('DN', ogr.OFTReal)
dst_layer.CreateField(newField)
gdal.FPolygonize(srcband, None, dst_layer, 0, [], callback=None)
dst_ds.SyncToDisk()
dst_ds=None

But output of this code is:

In second image, pixels with same value that are neighborhood were merged in vector output, but I don't want to merge them. First image is desirable but second image as output of the code is not desirable.
It is important to do it with GDAL code and not another tools such as in Convert Raster to vector – create polygons based on each pixel.

With another class, "Polygonize", its result is more different from what I expected (Based on @RafDouglas suggestion in answer section):

from osgeo import gdal, ogr, osr
import sys
import os

#gdal.UseExceptions()
os.chdir("H:/aa")
fileName = "H:/aa/image.tif"
src_ds = gdal.Open(fileName)
srs = osr.SpatialReference()
srs.ImportFromWkt(src_ds.GetProjection())
if src_ds is None:
    print('Unable to open %s' % src_fileName)
    sys.exit(1)
srcband = src_ds.GetRasterBand(1)
print (srcband)
dst_layername = "H:/aa/aapoly"
drv = ogr.GetDriverByName("ESRI Shapefile")
dst_ds = drv.CreateDataSource(dst_layername + ".shp")
dst_layer = dst_ds.CreateLayer(dst_layername , srs = srs, geom_type=ogr.wkbMultiPolygon)
newField = ogr.FieldDefn('DN', ogr.OFTReal)
dst_layer.CreateField(newField)
gdal.Polygonize(srcband, None, dst_layer, 0, [], callback=None)
dst_ds.SyncToDisk()
dst_ds=None

Unfavorable Result:

Best Answer

This code will make a point vector layer with points in the center of all of your pixels and having an 'ID' field that contains a unique value for each. If you take the output layer and rasterize it using the bounding area and scale from the input (i.e. with gdal.Translate) you should have the raster you want.

import ogr, gdal, osr, os
import numpy as np
import itertools

def pixelOffset2coord(raster, xOffset,yOffset):
    geotransform = raster.GetGeoTransform()
    originX = geotransform[0]
    originY = geotransform[3]
    pixelWidth = geotransform[1]
    pixelHeight = geotransform[5]
    coordX = originX+pixelWidth*xOffset
    coordY = originY+pixelHeight*yOffset
    return coordX, coordY

def raster2array(rasterfn):
    raster = gdal.Open(rasterfn)
    band = raster.GetRasterBand(1)
    array = band.ReadAsArray()
    return array

def array2shp(array,outSHPfn,rasterfn):

    # max distance between points
    raster = gdal.Open(rasterfn)
    geotransform = raster.GetGeoTransform()
    pixelWidth = geotransform[1]

    # wkbPoint
    shpDriver = ogr.GetDriverByName("ESRI Shapefile")
    if os.path.exists(outSHPfn):
        shpDriver.DeleteDataSource(outSHPfn)
    outDataSource = shpDriver.CreateDataSource(outSHPfn)
    outLayer = outDataSource.CreateLayer(outSHPfn, geom_type=ogr.wkbPoint )
    featureDefn = outLayer.GetLayerDefn()
    outLayer.CreateField(ogr.FieldDefn("ID", ogr.OFTInteger))

    # array2dict
    point = ogr.Geometry(ogr.wkbPoint)
    row_count = array.shape[0]
    for ridx, row in enumerate(array):
        if ridx % 100 == 0:
            print("{0} of {1} rows processed" .format(ridx, row_count))
        for cidx, value in enumerate(row):
            Xcoord, Ycoord = pixelOffset2coord(raster,cidx,ridx)
            point.AddPoint(Xcoord, Ycoord)
            outFeature = ogr.Feature(featureDefn)
            outFeature.SetGeometry(point)
            outLayer.CreateFeature(outFeature)
            outFeature.SetField("ID", outFeature.GetFID())
            outLayer.SetFeature(outFeature)
            outFeature.Destroy()
    outDataSource.Destroy()

def main(rasterfn,outSHPfn):
    array = raster2array(rasterfn)
    array2shp(array,outSHPfn,rasterfn)

if __name__ == "__main__":
    rasterfn = r'C:/image.tif'
    outSHPfn = r'C:/points.shp'
    main(rasterfn,outSHPfn)

This will only work on small rasters due to the size constraints of ESRI's .shp file format.

Related Solutions

[GIS] Rescale Raster Band With GDAL Python Bindings

Not entirely sure there's a ready-made function/class in GDAL python bindings, but if you're brave, you can look at the source of gdal_translate and find out how its done. From my quick skim, I think that scale ratio is set on VRTComplexSource (dfScaleRatio property).

[GIS] Converting coordinates to pixels with out losing points

I think that there is not any problem in your data. They have values equal zero in that area. Issues are others. When I modified your code for obtaining your points and your raster (by using gdal and PyQGIS python modules), they don't match in Italy area. Your geotransform parameters are wrong (I fixed them with 'Polygon from layer extent' for point layer). Complete code is as follow:

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from osgeo import gdal, osr

df = pd.read_csv('/home/zeito/Desktop/cords_value.csv')

cords = df.as_matrix(columns=['x','y'])

points = [ QgsPoint(pt[0], pt[1]) for pt in cords ]

#gt = [7.6445503225, 5.4065168747250134e-06,  0.0,  45.07436634583334,  0.0, -5.406516856707135e-06]
gt = [7.65917, 5.4065168747250134e-06,  0.0,  45.0659,  0.0, -5.406516856707135e-06]
#xMin,yMin 7.65917,45.0656 : xMax,yMax 7.65972,45.0659

index=np.zeros(cords.shape)

index[:,1]=((cords[:,1] - gt[3]) / gt[5]).round()
index[:,0]=((cords[:,0] - gt[0]) / gt[1]).round()

index=index.astype(int)

index[:,0]=index[:,0]-min(index[:,0])+1
index[:,1]=index[:,1]-min(index[:,1])+1

row=max(index[:,1])
col=max(index[:,0])

image=np.zeros([row+1,col+1])

for i in range(0,len(index)):
    image[index[i,1],index[i,0]] = df['value'][i]

# Create gtif file 
driver = gdal.GetDriverByName("GTiff")

output_file = "/home/zeito/pyqgis_data/image.tif"

dst_ds = driver.Create(output_file, 
                       col+1, 
                       row+1, 
                       1, 
                       gdal.GDT_Float32)

#writting output raster
dst_ds.GetRasterBand(1).WriteArray( image )

#setting extension of output raster
# top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution
dst_ds.SetGeoTransform(gt)

dst_ds.GetRasterBand(1).SetNoDataValue(0)

# setting spatial reference of output raster 
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326) 
dst_ds.SetProjection( srs.ExportToWkt() )

dst_ds = None

epsg = 4326

uri = "Point?crs=epsg:" + str(epsg) + "&field=id:integer&field=value:double""&index=yes"

mem_layer = QgsVectorLayer(uri,
                           'point',
                           'memory')

prov = mem_layer.dataProvider()

feats = [ QgsFeature() for i in range(len(points)) ]

for i, feat in enumerate(feats):
    feat.setAttributes([ i, float(df['value'][i]) ])
    feat.setGeometry(QgsGeometry.fromPoint(points[i]))

prov.addFeatures(feats)

QgsMapLayerRegistry.instance().addMapLayer(mem_layer)

After running above code at Python Console, it can be observed that points and image are shifted. It's due to algorithm to produce image. You need to fix it.

However, you can also rasterize your points by values and get a very quick result; as it observed at next image:

Apparently, there is not lost points in that area; as it showed by above image.

Best Answer

Related Solutions

[GIS] Rescale Raster Band With GDAL Python Bindings

[GIS] Converting coordinates to pixels with out losing points

Related Question