OGR – Creating an ESRI Shapefile from WKT Using GDAL in Python

gdalogrpython 3well-known-text

I want to create an ESRI Shapefile layer from a CSV file. In my CSV I have several attributes (text, numeric) and geometry in WKT style. I have a problem with the geometry field wich is truncated to a string field (254 characters), because I don't know how to declare it.

How to create ESRI Shapefile geometric data from WKT with ogr ?
ogr.CreateGeometryFromWkt() isn't the good way ?

Maybe with string formatting operators ?

CSV source file:

serialnumber;name;objectnumber;city;geom
034002002;Peter;1;34086;POLYGON ((676923.736 6265215.412,676954.294 ... )) 
034002003;John;2;34284;POLYGON ((678870.753 6264475.605,678867.203 ... )) 
034002004;Steeve;3;34086;POLYGON ((678001.823 6264195.123,677975.609 ... ))

I found this method wich explain how to parse a delimited CSV file to create an ESRI Shapefile. I'm using Python 3.7.4 and GDAL 2.3.3.

Python script:

# -*- coding: utf-8 -*-    
import osgeo.ogr as ogr
import osgeo.osr as osr
import csv
import os
os.environ['GDAL_DATA'] = os.environ['CONDA_PREFIX'] + r'\Library\share\gdal'
os.environ['PROJ_LIB'] = os.environ['CONDA_PREFIX'] + r'\Library\share'

myInput = 'input.csv'

with open(myInput, 'r', encoding='utf-8') as csv_file:
    reader = csv.DictReader(csv_file, delimiter=';', quotechar='"')

    # set up the shapefile driver
    driver = ogr.GetDriverByName("ESRI Shapefile")

    # create the data source
    data_source = driver.CreateDataSource("output.shp")

    # create the spatial reference
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(2154)

    # create the layer
    layer = data_source.CreateLayer("output", srs, ogr.wkbPolygon)

    # Add the fields we're interested in
    field_serialnumber = ogr.FieldDefn("serialnumber", ogr.OFTString)
    field_serialnumber.SetWidth(9)
    layer.CreateField(field_serialnumber)
    field_name = ogr.FieldDefn("name", ogr.OFTString)
    field_name.SetWidth(50)
    layer.CreateField(field_name)
    layer.CreateField(ogr.FieldDefn("objectnumber", ogr.OFTInteger))
    field_city = ogr.FieldDefn("city", ogr.OFTString)
    field_city.SetWidth(5)
    layer.CreateField(field_city)
    layer.CreateField(ogr.FieldDefn("geom"))

    # Process the text file and add the attributes and features to the shapefile
    for row in reader:
        # create the feature
        feature = ogr.Feature(layer.GetLayerDefn())
        # Set the attributes using the values from the delimited text file
        feature.SetField("serialnumber", row['serialnumber'])
        feature.SetField("name", row['name'])
        feature.SetField("objectnumber", row['objectnumber'])
        feature.SetField("city", row['city'])
        feature.SetField("geom", row['geom'])

        # create the WKB for the feature using Python string formatting
        wkt = (row['geom'])
        polygon = ogr.CreateGeometryFromWkt(wkt)

        # Set the feature geometry using the point
        feature.SetGeometry(polygon)

        # Create the feature in the layer (shapefile)
        layer.CreateFeature(feature)

        # Dereference the feature
        feature = None

        # Save and close the data source
        data_source = None

PS: QGIS is able to translate my CSV file to a layer when I import it as delimited source layer, so i'm sure this data is valid.

Best Answer

I've got it! It was an identation problem with the last instruction which save and close data source, it must be outside the for loop. I also removed my geom field in the ESRI Shapefile, I don't need it. A full answer for @Ian Turton : I'm using DictReader to access to my fields by their names, which is an easier way than indexes (if I understand well).

Here is a correct syntax :

# -*- coding: utf-8 -*-    
import osgeo.ogr as ogr
import osgeo.osr as osr
import csv
import os
os.environ['GDAL_DATA'] = os.environ['CONDA_PREFIX'] + r'\Library\share\gdal'
os.environ['PROJ_LIB'] = os.environ['CONDA_PREFIX'] + r'\Library\share'

myInput = 'input.csv'

with open(myInput, 'r', encoding='utf-8') as csv_file:
    reader = csv.DictReader(csv_file, delimiter=';', quotechar='"')

    # set up the shapefile driver
    driver = ogr.GetDriverByName("ESRI Shapefile")

    # create the data source
    data_source = driver.CreateDataSource("output.shp")

    # create the spatial reference
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(2154)

    # create the layer
    layer = data_source.CreateLayer("output", srs, ogr.wkbPolygon)

    # Add the fields we're interested in
    field_serialnumber = ogr.FieldDefn("serialnumber", ogr.OFTString)
    field_serialnumber.SetWidth(9)
    layer.CreateField(field_serialnumber)
    field_name = ogr.FieldDefn("name", ogr.OFTString)
    field_name.SetWidth(50)
    layer.CreateField(field_name)
    layer.CreateField(ogr.FieldDefn("objectnumber", ogr.OFTInteger))
    field_city = ogr.FieldDefn("city", ogr.OFTString)
    field_city.SetWidth(5)
    layer.CreateField(field_city)        

    # Process the text file and add the attributes and features to the shapefile
    for row in reader:
        # create the feature
        feature = ogr.Feature(layer.GetLayerDefn())
        # Set the attributes using the values from the delimited text file
        feature.SetField("serialnumber", row['serialnumber'])
        feature.SetField("name", row['name'])
        feature.SetField("objectnumber", row['objectnumber'])
        feature.SetField("city", row['city'])

        # create the WKT for the feature using Python string formatting
        wkt = (row['geom'])
        polygon = ogr.CreateGeometryFromWkt(wkt)

        # Set the feature geometry using the point
        feature.SetGeometry(polygon)

        # Create the feature in the layer (shapefile)
        layer.CreateFeature(feature)

        # Dereference the feature
        feature = None

    # Save and close the data source
    data_source = None