Google Earth Engine & Python – Iterating Over ImageCollection to Return Pandas DataFrame

google-earth-engineiterationpandaspython

I'm trying to extract values from an ImageCollection to a series of points. My goal is to create a pandas dataframe that includes the values from each image & band of the ImageCollection for each point.

I'm using google earth engine & python in a docker, run on my local machine (using the set-up described here):
https://developers.google.com/earth-engine/python_install-datalab-local

My approach is to:

  1. Create FeatureClass with a few points

  2. Import ImageCollection, bound by the above points

  3. Create an initial pandas dataframe

  4. Create function (extract_point_values) where an image and pandas dataframe are the inputs. Values of image are extracted over FeatureClass/points, and values are put into a pandas dataframe. This pandas dataframe is appended to the one that was inputted into the function.

The issue is in the "extract_point_values" function and how I'm iterating over the imageCollection. Is it possible to return a pandas dataframe using the iterate function on an imageCollection? If so, any ideas as to what might be going wrong?

%%bash
apt-get update -y

apt-get install -y -q python-dev python-pip libxml2-dev libxslt1-dev zlib1g-
dev libffi-dev libssl-dev

pip install geopandas
pip install shapely

.

import ee, datetime
import pandas as pd
import geopandas as gpd
import matplotlib.dates as mdates
from IPython.display import Image
from matplotlib import dates
from shapely.geometry import shape
import skimage
ee.Initialize()
%matplotlib inline

# ==========================================================================
# Function to Convert Feature Classes to Pandas Dataframe
# Adapted from: https://events.hpc.grnet.gr/event/47/material/1/12.py
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df['geometry'] = map(lambda s: shape(s), df.geometry)    
    return df

# ==========================================================================
# Function to iterate over image collection, returning a pandas dataframe
# THIS FUNCTION ISN'T WORKING
def extract_point_values(image, df):

  # Extract values of rasters to points
  image_red = image.reduceRegions(collection=points, 
                                  reducer=ee.Reducer.mean(),
                                  scale=30)

  # Convert output to pandas data frame
  image_red_pd = fc2df(image_red)

  # Add date variable to data frame
  image_red_pd['date'] = image.getInfo()['properties']['DATE_ACQUIRED']

  df = df.append(image_red_pd)

  return df 

# ==========================================================================
#### Make Points
points = ee.FeatureCollection([
            ee.Feature(ee.Geometry.Point(14.742607, -17.494993)),
            ee.Feature(ee.Geometry.Point(14.715903, -17.450650)),
            ])

#### Load Raster
l8 = ee.ImageCollection('LANDSAT/LC8_L1T').filterDate('2015-01-01', '2015-12-
31').filterBounds(points)

#### Create Initial Pandas Dataframe
l8_singleImage = ee.Image('LANDSAT/LC8_L1T/LC80440342014077LGN00')

l8_singleImage_red = l8_singleImage.reduceRegions(collection=points, 
                            reducer=ee.Reducer.mean(),
                            scale=30)

l8_singleImage_red_pd = fc2df(l8_singleImage_red)

l8_singleImage_red_pd['date'] = l8_singleImage.getInfo()['properties']
['DATE_ACQUIRED']

l8_singleImage_red_pd = l8_singleImage_red_pd.drop([0,1])

#### Iterate over image collection
pd_all = l8.iterate(extract_point_values, l8_singleImage_red_pd)

Here's the error that's returned

    EEExceptionTraceback (most recent call last)
<ipython-input-70-874bf6d3dd39> in <module>()
     66 
     67 #### Iterate over image collection
---> 68 pd_all = l8.iterate(extract_point_values, l8_singleImage_red_pd)

/src/earthengine-api/python/ee/collection.pyc in iterate(self, algorithm, first)
    223     with_cast = lambda e, prev: algorithm(element_type(e), prev)
    224     return apifunction.ApiFunction.call_(
--> 225         'Collection.iterate', self, with_cast, first)

/src/earthengine-api/python/ee/apifunction.pyc in call_(cls, name, *args, 
**kwargs)
     79       a recognized return type, the returned value will be cast to 
that type.
     80     """
---> 81     return cls.lookup(name).call(*args, **kwargs)
     82 
     83   @classmethod

/src/earthengine-api/python/ee/function.pyc in call(self, *args, **kwargs)
     65       to that type.
     66     """
---> 67     return self.apply(self.nameArgs(args, kwargs))
     68 
     69   def apply(self, named_args):

/src/earthengine-api/python/ee/function.pyc in apply(self, named_args)
     78       to that type.
     79     """
---> 80     result = computedobject.ComputedObject(self, 
self.promoteArgs(named_args))
     81     return Function._promoter(result, self.getReturnType())
     82 

    /src/earthengine-api/python/ee/function.pyc in promoteArgs(self, args)
    105       name = spec['name']
    106       if name in args:
--> 107         promoted_args[name] = Function._promoter(args[name], spec['type'])
    108       elif not spec.get('optional'):
    109         raise ee_exception.EEException(

/src/earthengine-api/python/ee/__init__.pyc in _Promote(arg, klass)
    208       # A native function that needs to be wrapped.
    209       args_count = len(inspect.getargspec(arg).args)
--> 210       return CustomFunction.create(arg, 'Object', ['Object'] * args_count)
    211     elif isinstance(arg, Encodable):
    212       # An ee.Function or a computed function like the return value of

/src/earthengine-api/python/ee/customfunction.pyc in create(func, return_type, 
arg_types)
     99         'args': args
    100     }
--> 101     return CustomFunction(signature, func)
    102 
    103   @staticmethod

/src/earthengine-api/python/ee/customfunction.pyc in __init__(self, signature, 
body)
     36     # The signature of the function.
     37     self._signature = CustomFunction._resolveNamelessArgs(
---> 38         signature, variables, body)
     39 
     40     # The expression to evaluate.

/src/earthengine-api/python/ee/customfunction.pyc in 
_resolveNamelessArgs(signature, variables, body)
    143           count += CountFunctions(sub_expression)
    144       return count
--> 145     serialized_body = serializer.encode(body(*variables))
    146     base_name = '_MAPPING_VAR_%d_' % CountFunctions(serialized_body)
    147 

/src/earthengine-api/python/ee/collection.pyc in <lambda>(e, prev)
    221     """
    222     element_type = self.elementType()
--> 223     with_cast = lambda e, prev: algorithm(element_type(e), prev)
    224     return apifunction.ApiFunction.call_(
    225         'Collection.iterate', self, with_cast, first)

<ipython-input-70-874bf6d3dd39> in extract_point_values(image, df)
     33 
     34   # Convert output to pandas data frame
---> 35   image_red_pd = fc2df(image_red)
     36 
     37   # Add date variable to data frame

<ipython-input-70-874bf6d3dd39> in fc2df(fc)
      5     # Convert a FeatureCollection into a pandas DataFrame
      6     # Features is a list of dict with the output
----> 7     features = fc.getInfo()['features']
      8 
      9     dictarr = []

/src/earthengine-api/python/ee/collection.pyc in getInfo(self)
    125            properties.
    126     """
--> 127     return super(Collection, self).getInfo()
    128 
    129   def limit(self, maximum, opt_property=None, opt_ascending=None):

/src/earthengine-api/python/ee/computedobject.pyc in getInfo(self)
     93       The object can evaluate to anything.
     94     """
---> 95     return data.getValue({'json': self.serialize()})
     96 
     97   def encode(self, encoder):

/src/earthengine-api/python/ee/data.pyc in getValue(params)
    253   """
    254   params['json_format'] = 'v2'
--> 255   return send_('/value', params)
    256 
    257 

/src/earthengine-api/python/ee/data.pyc in send_(path, params, opt_method, 
opt_raw)
    795       raise ee_exception.EEException('Invalid JSON: %s' % content)
    796     if 'error' in json_content:
--> 797       raise ee_exception.EEException(json_content['error']['message'])
    798     if 'data' not in content:
    799       raise ee_exception.EEException('Malformed response: ' + 
str(content))

EEException: Failed to decode JSON.
Error: Field 'value' of object '{"type":"ArgumentRef","value":null}' is 
missing or 
null.
Object: {"type":"ArgumentRef","value":null}.

Best Answer

Found a solution, by making a list of the scene IDs from the imageCollection and iterating over the list. Then in a loop I import the individual images instead of mapping/iterating over the imageCollection. Probably a more efficient way to do this, but this gets the job done.

# ==========================================================================
# Function to Convert Feature Classes to Pandas Dataframe
# Adapted from: https://events.hpc.grnet.gr/event/47/material/1/12.py
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df['geometry'] = map(lambda s: shape(s), df.geometry)    
    return df

# ==========================================================================
# Function to iterate over image collection, returning a pandas dataframe
def extract_point_values(img_id, pts):
    image = ee.Image(img_id)

    fc_image_red = image.reduceRegions(collection=pts,
                                  reducer=ee.Reducer.mean(),
                                  scale=30)

    # Convert to Pandas Dataframe
    df_image_red = fc2df(fc_image_red)

    # Add Date as Variable
    df_image_red['date'] = image.getInfo()['properties']['DATE_ACQUIRED']

    return df_image_red

# ==========================================================================
#### Make Points
points = ee.FeatureCollection([
            ee.Feature(ee.Geometry.Point(14.742607, -17.494993)),
            ee.Feature(ee.Geometry.Point(14.715903, -17.450650)),
            ])

#### Load Raster
l8 = ee.ImageCollection('LANDSAT/LC8_L1T').filterDate('2015-01-01', '2015-12-
31').filterBounds(points)

#### Make list of image IDs
l8_id = []
for f in l8.getInfo()['features']:
  image_id = f['properties']['LANDSAT_SCENE_ID'].encode('ascii', 'ignore')
  image_id = 'LANDSAT/LC8_L1T/' + image_id
  l8_id.append(image_id)

#### Create Initial Pandas Dataframe
df_all = extract_point_values(l8_id[0], points)
df_all = df_all.drop([0,1])

#### Iterate over all impages
for i in l8_id:
    df_all = df_all.append(extract_point_values(i, points))

#### Display Results
df_all