SpatiaLite GeoPandas – Insert GeoPandas GeoDataFrame into SpatiaLite Database

geopandasspatialite

Is it possible to insert a Geopandas GeoDataFrame directly into a Spatialite database with Python so that the geometry column of the GeoDataFrame becomes a proper geometry table in the database which can be used for spatial queries?

I tried to do it the following way. The shapefile I used for creating an example GeoDataFrame can be downloaded from granvillegis.org (AddressPoints.zip) but any other shapefile should also be fine.

# import geopandas
import geopandas as gpd

# import sqlite/spatialite
from sqlalchemy import create_engine, event
from sqlite3 import dbapi2 as sqlite

# read shapefile into GeoDataFrame
print('reading shapefile')
gdf = gpd.GeoDataFrame.from_file('TM_WORLD_BORDERS_SIMPL-0.3.shp')

# create database engine with new sqlite database
engine = create_engine('sqlite:///TestDB.sqlite', module=sqlite)

# load spatialite extension for sqlite
@event.listens_for(engine, 'connect')
def connect(dbapi_connection, connection_rec):
    dbapi_connection.enable_load_extension(True)
    dbapi_connection.execute('SELECT load_extension("mod_spatialite.dll")')

# create spatialite metadata
print('creating spatial metadata...')
engine.execute("SELECT InitSpatialMetaData(1);")

# write geodataframe into spatialite database
print('writing into database...')
gdf.to_sql('WorldBorders', engine, if_exists='replace', index=False)

This gives:

Traceback (most recent call last):
  File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 1116, in _execute_context
  File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\default.py", line 439, in do_executemany
sqlite3.InterfaceError: Error binding parameter 2 - probably unsupported type.

The above exception was the direct cause of the following exception:

    Traceback (most recent call last):
      File "C:/xampp/htdocs/3rdPartyAnalysis-ProofOfConcept/test.py", line 27, in <module>
        gdf.to_sql('WorldBorders', engine, if_exists='replace', index=False)
      File "C:\Python34\lib\site-packages\pandas\core\generic.py", line 966, in to_sql
        dtype=dtype)
      File "C:\Python34\lib\site-packages\pandas\io\sql.py", line 538, in to_sql
        chunksize=chunksize, dtype=dtype)
      File "C:\Python34\lib\site-packages\pandas\io\sql.py", line 1172, in to_sql
        table.insert(chunksize)
      File "C:\Python34\lib\site-packages\pandas\io\sql.py", line 717, in insert
        self._execute_insert(conn, keys, chunk_iter)
      File "C:\Python34\lib\site-packages\pandas\io\sql.py", line 692, in _execute_insert
        conn.execute(self.insert_statement(), data)
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 914, in execute
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\sql\elements.py", line 323, in _execute_on_connection
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 1010, in _execute_clauseelement
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 1146, in _execute_context
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 1332, in _handle_dbapi_exception
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\util\compat.py", line 188, in raise_from_cause
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\util\compat.py", line 181, in reraise
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\base.py", line 1116, in _execute_context
      File "C:\Python34\lib\site-packages\sqlalchemy-1.0.0b4-py3.4.egg\sqlalchemy\engine\default.py", line 439, in do_executemany
    sqlalchemy.exc.InterfaceError: (sqlite3.InterfaceError) Error binding parameter 2 - probably unsupported type. [SQL: 'INSERT INTO "WorldBorders" ("ISO2", "NAME", geometry) VALUES (?, ?, ?)'] [parameters: (('AG', 'Antigua and Barbuda', <shapely.geometry.multipolygon.MultiPolygon object at 0x0000000006031D30>), ('DZ', 'Algeria', <shapely.geometry.polygon.Polygon object at 0x000000000722C630>), ('AZ', 'Azerbaijan', <shapely.geometry.multipolygon.MultiPolygon object at 0x000000000722C588>), ('AL', 'Albania', <shapely.geometry.polygon.Polygon object at 0x000000000722C7F0>), ('AM', 'Armenia', <shapely.geometry.polygon.Polygon object at 0x000000000722C978>), ('AO', 'Angola', <shapely.geometry.multipolygon.MultiPolygon object at 0x000000000722C898>), ('AS', 'American Samoa', <shapely.geometry.multipolygon.MultiPolygon object at 0x000000000722CA20>), ('AR', 'Argentina', <shapely.geometry.multipolygon.MultiPolygon object at 0x000000000722CB38>)  ... displaying 10 of 246 total bound parameter sets ...  ('GS', 'South Georgia South Sandwich Islands', <shapely.geometry.multipolygon.MultiPolygon object at 0x0000000007251C18>), ('TW', 'Taiwan', <shapely.geometry.multipolygon.MultiPolygon object at 0x0000000007251D68>))]

Apparently, the geometry column of the GeoDataFrame is not converted properly into a suitable Spatialite geometry format by the to_sql method of Geopandas.

Is there any other way to insert the GeoDataFrame correctly into the database, maybe by using some conversion to another format before inserting?

Best Answer

Okay, meanwhile I found out how this can be achieved. I don't know if this is the most efficient way but for my tables which are not huge it works fast enough.

The idea is that geopandas stores geometries as shapely geometry objects in each row of the geometry column of a GeoDataFrame (which is in fact just a GeoSeries) so that most of shapely's methods can be applied to it. Using the shapely.wkb.dumps method, each geometry object in the geometry column can be replaced with its well-known-binary representation (using the well-known-text representation given by shapely.wkt.dumps also worked, however, I did not try which one is faster).

Then, the whole GeoDataFrame including the geometry column as WKB is written into a new database table. That followed, a new Spatialite-compatible geometry column is added to the table (in Spatialite, geometry columns apparently can not be created together with a table but have to be added afterwards). This new column is now filled with Spatialite geometry objects by parsing the WKB representations from the former geopandas geometry column.

Last but not least, the former geometry column is dropped from the database table as it is not needed anymore since it has been replaced by the new one.

Find the full code below. Hopefully everything is explained adequately by the comments. However, I'll still appreciate if anybody knows a cleaner or faster solution.

import geopandas as gpd

# shapely method to convert geometry objects into their well-known-binary representation
import shapely.wkb

# sqlite/spatialite
from sqlalchemy import create_engine, event
from sqlite3 import dbapi2 as sqlite

# file operations
import os


def writeIntoDatabase():
    # read shapefile into GeoDataFrame
    print('reading shapefile')
    gdf = gpd.GeoDataFrame.from_file('AddressPoints.shp')

    # make sure that the database does not exist yet, otherwise it will be opened instead of overwritten which will 
    # cause errors in this example
    if os.path.exists('TestDB.sqlite'):
        os.remove('TestDB.sqlite')

    # create database engine and create sqlite database
    engine = create_engine('sqlite:///TestDB.sqlite', module=sqlite)

    # load spatialite extension for sqlite. make sure that mod_spatialite.dll is located in a folder that is in your 
    # system path
    @event.listens_for(engine, 'connect')
    def connect(dbapi_connection, connection_rec):
        dbapi_connection.enable_load_extension(True)
        dbapi_connection.execute('SELECT load_extension("mod_spatialite.dll")')

    # create spatialite metadata
    print('creating spatial metadata...')
    engine.execute("SELECT InitSpatialMetaData(1);")

    # convert all values from the geopandas geometry column into their well-known-binary representations
    gdf['geometry'] = gdf.apply(lambda x: shapely.wkb.dumps(x.geometry), axis=1)

    # write the geodataframe into the spatialite database, creating a new table 'AddressPoints' and replacing any 
    # existing of the same name
    print('writing into database...')
    gdf.to_sql('AddressPoints', engine, if_exists='replace', index=False)

    # add a Spatialite geometry column called 'geom' to the table, using ESPG 4326, data type POINT and 2 dimensions 
    # (x, y)
    engine.execute("SELECT AddGeometryColumn('AddressPoints', 'geom', 4326, 'POINT', 2);")

    # update the yet empty geom column by parsing the well-known-binary objects from the geometry column into 
    # Spatialite geometry objects
    engine.execute("UPDATE AddressPoints SET geom=GeomFromWKB(geometry, 4326);")

    # drop the geometry column from the GeoDataFrame (and all other columns but one to keep it concise, adapt this to
    #  your needs) which are not needed anymore. unfortunately, there is no DROP TABLE support in sqlite3, 
    # so a heavy workaround is needed via a temporary table.
    connection = engine.connect()
    with connection.begin() as trans:
        connection.execute("BEGIN TRANSACTION;")
        connection.execute("CREATE TABLE AddressPoints_backup(Add_Number, geom);")
        connection.execute("INSERT INTO AddressPoints_backup SELECT Add_Number, geom from AddressPoints;")
        connection.execute("DROP TABLE AddressPoints;")
        connection.execute("CREATE TABLE AddressPoints(Add_Number, geom);")
        connection.execute("INSERT INTO AddressPoints SELECT Add_Number, geom FROM AddressPoints_backup;")
        connection.execute("DROP TABLE AddressPoints_backup;")
        trans.commit()


# reading some spatial data from the database to see if it worked
def readFromDatabase():
    # create database engine and open existing sqlite database
    engine = create_engine('sqlite:///TestDB.sqlite', module=sqlite)

    # load spatialite extension for sqlite. make sure that mod_spatialite.dll is located in a folder that is in your 
    # system path
    @event.listens_for(engine, 'connect')
    def connect(dbapi_connection, connection_rec):
        dbapi_connection.enable_load_extension(True)
        dbapi_connection.execute('SELECT load_extension("mod_spatialite.dll")')

    # select X and Y coordinates from the POINT geometries in the database table
    x = engine.execute("SELECT X(geom) FROM AddressPoints;")
    y = engine.execute("SELECT Y(geom) FROM AddressPoints;")

    # print results
    xy = zip(x, y)
    for row in xy:
        print(row)

# start functions
writeIntoDatabase()
readFromDatabase()
Related Question