[GIS] Using Union on multiple shapefiles to output single result

pyqgisqgis-processingunion

Using QGIS 2.2 and Python, I would like to use the Union algorithm on multiple shapefiles (contained in a folder TEST) which would then produce single shapefile containing all the data.

Currently the following code only outputs the union of the first and last file, how can I modify it so that the output of 2 layers is saved and then use that output with another layer etc?

UPDATE: The shapefiles are being processed in terms of the clip_function however I can't load the temp files when running the qgis:union algorithms. Also, an error occurs when it tries to output the Result.

##Model=name
##Select_folder=folder
##Cellsize=number 1000
##Result=output vector

import os
import glob
import tempfile
from qgis import utils
from qgis.core import QgsVectorLayer, QgsMapLayerRegistry, QgsVectorFileWriter, QgsCoordinateReferenceSystem

#   Create grid with defined parameters
outputs_1=processing.runalg("qgis:creategrid", Cellsize, Cellsize, 24108, 18351.157175, 258293.802316, 665638.226408, 1, 'EPSG:7405', None)

def clip_function():

#   Set directory and search for all .shp files
    os.chdir("C:\Users\TEST\DATA\\")
    for fname in glob.glob("*.shp"): 

#   Clip .shp files within directory to grid and save files as temp
        outputs_2=processing.runalg("qgis:clip", outputs_1['SAVENAME'], fname, None)
        outputs_3=processing.runalg("qgis:fieldcalculator", outputs_2['OUTPUT'], 'Rank_' + fname, 1, 10, 0, True, 1 , None)            

def binary_alg_wrapper(list_of_files,final_file_name):
    run_count=0
    temp_path=tempfile.gettempdir()
    temp_input_name="temp_input_layer"
    temp_output_name="temp_output_layer"
    temp_input_layer_filename=str(os.path.join(temp_path,temp_input_name+".shp"))
    temp_output_layer_filename=str(os.path.join(temp_path,temp_output_name+".shp"))
    for file_name in list_of_files:
        EPSG_Code_for_file=4326
        run_count+=1
        if run_count==1:
            #Copy the first file into a temp location
            temp_output_layer = QgsVectorLayer(file_name, "temp_output_layer", "ogr")
            Add_result = QgsMapLayerRegistry.instance().addMapLayer(temp_output_layer)
            write_error = QgsVectorFileWriter.writeAsVectorFormat(temp_output_layer,temp_output_layer_filename,"system",QgsCoordinateReferenceSystem(EPSG_Code_for_file), "ESRI Shapefile")
        QgsMapLayerRegistry.instance().removeMapLayer(temp_output_layer.id())
    if run_count>1:
        #Copy the result of the last algorithm into a temp location
        old_output_layer = QgsVectorLayer(temp_output_layer_filename, "input_layer",     "ogr")
        Add_result=QgsMapLayerRegistry.instance().addMapLayer(old_output_layer)
        write_error = QgsVectorFileWriter.writeAsVectorFormat(old_output_layer,temp_input_layer_filename, "system", QgsCoordinateReferenceSystem(EPSG_Code_for_file), "ESRI Shapefile")
        QgsMapLayerRegistry.instance().removeMapLayer(old_output_layer.id())

        #Read in a new file and the file stored in the temp location
        input1_layer = QgsVectorLayer(temp_input_layer_filename, "input1_layer", "ogr")
        input2_layer = QgsVectorLayer(file_name, "input2_layer", "ogr")

        #Run the binary algorithm and store the result into a temp location or output location
        if run_count==len(list_of_files):
            processing.runalg("qgis:union", input1_layer, input2_layer, temp_output_layer_filename)
        else:
            processing.runalg("qgis:union", input1_layer, input2_layer, final_file_name)   

        QgsMapLayerRegistry.instance().removeMapLayer(input1_layer.id())
        QgsMapLayerRegistry.instance().removeMapLayer(input2_layer.id())


path_res = tempfile.gettempdir()
file_list = [shp for shp in glob.glob(path_res + "*.shp")]
Result=(os.path.join(path_res, "result.shp"))

if Select_folder:
    clip_function()
    binary_alg_wrapper(file_list, Result)

Best Answer

In order to carry out binary shapefile operations such as the one you intend to use on more than two files you can do the process in a step wise fashion using temporary files.

A TESTED peice of code is below:

import os
import tempfile
import shutil
import processing
import glob

def delete_shape(shape_file):
    shape_file_no_ext=os.path.splitext(shape_file)[0]
    ext_list=('shx','dbf','qpj','prj','shp','cpg')
    for extension in ext_list:
        try:
            os.remove(shape_file_no_ext+'.'+extension)
        except:
            pass

def copy_shape(shape_file,destination_shape_file):
    delete_shape(destination_shape_file)
    shape_file_no_ext=os.path.splitext(shape_file)[0]
    destination_shape_file_no_ext=os.path.splitext(destination_shape_file)[0]
    ext_list=('shx','dbf','qpj','prj','shp','cpg')
    for extension in ext_list:
        try:
            shutil.copyfile(shape_file_no_ext+'.'+extension ,destination_shape_file_no_ext+'.'+extension)
        except:
            pass

def wrapped_alg(algorithm_name,file_name_list, output_file):
    #Copy files to temp directory 
    temp_file_list=[]
    for src_file in file_name_list:
        dst_file=os.path.join(tempfile.gettempdir(),os.path.basename(src_file))
        copy_shape(src_file,dst_file)
        temp_file_list.append(dst_file)

    #binary process on files in temp directory
    while len(temp_file_list)>1:
        temp_file=os.path.join(tempfile.gettempdir(),'multi_merge_temp_file.shp')
        ultimate_file=temp_file_list.pop()
        penultimate_file=temp_file_list.pop()
        processing.runalg(algorithm_name, ultimate_file, penultimate_file, temp_file)
        delete_shape(penultimate_file)
        copy_shape(temp_file,penultimate_file)
        delete_shape(temp_file)
        delete_shape(ultimate_file)
        temp_file_list.append(penultimate_file)

    #copy over final file
    copy_shape(temp_file_list.pop(),output_file)

dir_to_process="C:\\test\\files\\"
output_file="C:\\test\\result\\output.shp"
file_name_list = [shp for shp in glob.glob(dir_to_process + "*.shp")]
alg_name="qgis:mergevectorlayers"
#alg_name="qgis:union"
wrapped_alg(alg_name,file_name_list, output_file)

NB1 this code can be used for any 'binary' processing algorithm normally run with the form

processing.runalg("algorithm_name", file1, file2, outputfile)

NB2: As written it ony works on shape files NB3: Ensure all your shape files are in the same projection NB4: This code is now tested on "qgis:union" and "qgis:merge" NB5: This code is for algorithms which can run on files (additional code would be required if the inputs HAD to be loaded layers) NB6 I do realise that for merge the following would be much more efficient but I wanted to supply a generic solution

processing.runalg("saga:mergeshapeslayers", file_name_list[0], ";".join(file_name_list[1:]) , outputfile)
Related Question