diff --git a/guides/linked/AI_platform_demo.ipynb b/guides/linked/AI_platform_demo.ipynb index 404887c86..698e24932 100644 --- a/guides/linked/AI_platform_demo.ipynb +++ b/guides/linked/AI_platform_demo.ipynb @@ -94,7 +94,7 @@ "# Import and initialize the Earth Engine library.\n", "import ee\n", "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] diff --git a/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb b/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb index e89ec3e78..56bfeb667 100644 --- a/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb +++ b/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb @@ -44,11 +44,11 @@ "id": "aV1xZ1CPi3Nw" }, "source": [ - "
\n", - "\n", - " Run in Google Colab\n", - "\n", - " View source on GitHub
" + "\u003ctable class=\"ee-notebook-buttons\" align=\"left\"\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb\"\u003e\n", + " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e Run in Google Colab\u003c/a\u003e\n", + "\u003c/td\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"https://github.com/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e View source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" ] }, { @@ -59,7 +59,7 @@ "source": [ "# Introduction\n", "\n", - "This is an Earth Engine <> TensorFlow demonstration notebook. This demonstrates a per-pixel neural network implemented in a way that allows the trained model to be hosted on [Google AI Platform](https://cloud.google.com/ai-platform) and used in Earth Engine for interactive prediction from an `ee.Model.fromAIPlatformPredictor`. See [this example notebook](http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/TF_demo1_keras.ipynb) for background on the dense model.\n", + "This is an Earth Engine \u003c\u003e TensorFlow demonstration notebook. This demonstrates a per-pixel neural network implemented in a way that allows the trained model to be hosted on [Google AI Platform](https://cloud.google.com/ai-platform) and used in Earth Engine for interactive prediction from an `ee.Model.fromAIPlatformPredictor`. See [this example notebook](http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/TF_demo1_keras.ipynb) for background on the dense model.\n", "\n", "**Running this demo may incur charges to your Google Cloud Account!**" ] @@ -132,7 +132,7 @@ "source": [ "import ee\n", "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -666,19 +666,19 @@ "\n", "folium.TileLayer(\n", " tiles=mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='median composite',\n", " ).add_to(map)\n", "folium.TileLayer(\n", " tiles=label_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='predicted label',\n", ").add_to(map)\n", "folium.TileLayer(\n", " tiles=probability_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='probability',\n", ").add_to(map)\n", @@ -689,4 +689,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb b/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb index db247f623..49bae2b5c 100644 --- a/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb +++ b/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb @@ -43,11 +43,11 @@ "id": "aV1xZ1CPi3Nw" }, "source": [ - "
\n", - "\n", - " Run in Google Colab\n", - "\n", - " View source on GitHub
" + "\u003ctable class=\"ee-notebook-buttons\" align=\"left\"\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb\"\u003e\n", + " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e Run in Google Colab\u003c/a\u003e\n", + "\u003c/td\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"https://github.com/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_logistic_regression.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e View source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" ] }, { @@ -123,7 +123,7 @@ "source": [ "import ee\n", "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -724,25 +724,25 @@ "map = folium.Map(location=[-9.1, -62.3], zoom_start=11)\n", "folium.TileLayer(\n", " tiles=image_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='image',\n", ").add_to(map)\n", "folium.TileLayer(\n", " tiles=probability_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='probability',\n", ").add_to(map)\n", "folium.TileLayer(\n", " tiles=predicted_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='predicted',\n", ").add_to(map)\n", "folium.TileLayer(\n", " tiles=reference_mapid['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " overlay=True,\n", " name='reference',\n", ").add_to(map)\n", @@ -753,4 +753,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/guides/linked/TF_demo1_keras.ipynb b/guides/linked/TF_demo1_keras.ipynb index 52adba86c..2c7c7de50 100644 --- a/guides/linked/TF_demo1_keras.ipynb +++ b/guides/linked/TF_demo1_keras.ipynb @@ -1 +1,1326 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"TF_demo1_keras.ipynb","provenance":[],"private_outputs":true,"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"id":"fSIfBsgi8dNK","colab_type":"code","colab":{}},"source":["#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aV1xZ1CPi3Nw","colab_type":"text"},"source":["
\n","\n"," Run in Google Colab\n","\n"," View source on GitHub
"]},{"cell_type":"markdown","metadata":{"id":"AC8adBmw-5m3","colab_type":"text"},"source":["# Introduction\n","\n","This is an Earth Engine <> TensorFlow demonstration notebook. Specifically, this notebook shows:\n","\n","1. Exporting training/testing data from Earth Engine in TFRecord format.\n","2. Preparing the data for use in a TensorFlow model.\n","2. Training and validating a simple model (Keras `Sequential` neural network) in TensorFlow.\n","3. Making predictions on image data exported from Earth Engine in TFRecord format.\n","4. Ingesting classified image data to Earth Engine in TFRecord format.\n","\n","This is intended to demonstrate a complete i/o pipeline. For a workflow that uses a [Google AI Platform](https://cloud.google.com/ai-platform) hosted model making predictions interactively, see [this example notebook](http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb)."]},{"cell_type":"markdown","metadata":{"id":"KiTyR3FNlv-O","colab_type":"text"},"source":["# Setup software libraries\n","\n","Import software libraries and/or authenticate as necessary."]},{"cell_type":"markdown","metadata":{"id":"dEM3FP4YakJg","colab_type":"text"},"source":["## Authenticate to Colab and Cloud\n","\n","To read/write from a Google Cloud Storage bucket to which you have access, it's necessary to authenticate (as yourself). *This should be the same account you use to login to Earth Engine*. When you run the code below, it will display a link in the output to an authentication page in your browser. Follow the link to a page that will let you grant permission to the Cloud SDK to access your resources. Copy the code from the permissions page back into this notebook and press return to complete the process.\n","\n","(You may need to run this again if you get a credentials error later.)"]},{"cell_type":"code","metadata":{"id":"sYyTIPLsvMWl","colab_type":"code","cellView":"code","colab":{}},"source":["from google.colab import auth\n","auth.authenticate_user()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Ejxa1MQjEGv9","colab_type":"text"},"source":["## Authenticate to Earth Engine\n","\n","Authenticate to Earth Engine the same way you did to the Colab notebook. Specifically, run the code to display a link to a permissions page. This gives you access to your Earth Engine account. *This should be the same account you used to login to Cloud previously*. Copy the code from the Earth Engine permissions page back into the notebook and press return to complete the process."]},{"cell_type":"code","metadata":{"id":"HzwiVqbcmJIX","colab_type":"code","cellView":"code","colab":{}},"source":["import ee\n","ee.Authenticate()\n","ee.Initialize()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"iJ70EsoWND_0","colab_type":"text"},"source":["## Test the TensorFlow installation\n","\n","Import the TensorFlow library and check the version."]},{"cell_type":"code","metadata":{"id":"i1PrYRLaVw_g","colab_type":"code","cellView":"code","colab":{}},"source":["import tensorflow as tf\n","print(tf.__version__)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"b8Xcvjp6cLOL","colab_type":"text"},"source":["## Test the Folium installation\n","\n","We will use the Folium library for visualization. Import the library and check the version."]},{"cell_type":"code","metadata":{"id":"YiVgOXzBZJSn","colab_type":"code","colab":{}},"source":["import folium\n","print(folium.__version__)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"DrXLkJC2QJdP","colab_type":"text"},"source":["# Define variables\n","\n","This set of global variables will be used throughout. For this demo, you must have a Cloud Storage bucket into which you can write files. ([learn more about creating Cloud Storage buckets](https://cloud.google.com/storage/docs/creating-buckets)). You'll also need to specify your Earth Engine username, i.e. `users/USER_NAME` on the [Code Editor](https://code.earthengine.google.com/) Assets tab."]},{"cell_type":"code","metadata":{"id":"GHTOc5YLQZ5B","colab_type":"code","colab":{}},"source":["# Your Earth Engine username. This is used to import a classified image\n","# into your Earth Engine assets folder.\n","USER_NAME = 'username'\n","\n","# Cloud Storage bucket into which training, testing and prediction \n","# datasets will be written. You must be able to write into this bucket.\n","OUTPUT_BUCKET = 'your-bucket'\n","\n","# Use Landsat 8 surface reflectance data for predictors.\n","L8SR = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\n","# Use these bands for prediction.\n","BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']\n","\n","# This is a training/testing dataset of points with known land cover labels.\n","LABEL_DATA = ee.FeatureCollection('projects/google/demo_landcover_labels')\n","# The labels, consecutive integer indices starting from zero, are stored in\n","# this property, set on each point.\n","LABEL = 'landcover'\n","# Number of label values, i.e. number of classes in the classification.\n","N_CLASSES = 3\n","\n","# These names are used to specify properties in the export of\n","# training/testing data and to define the mapping between names and data\n","# when reading into TensorFlow datasets.\n","FEATURE_NAMES = list(BANDS)\n","FEATURE_NAMES.append(LABEL)\n","\n","# File names for the training and testing datasets. These TFRecord files\n","# will be exported from Earth Engine into the Cloud Storage bucket.\n","TRAIN_FILE_PREFIX = 'Training_demo'\n","TEST_FILE_PREFIX = 'Testing_demo'\n","file_extension = '.tfrecord.gz'\n","TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension\n","TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension\n","\n","# File name for the prediction (image) dataset. The trained model will read\n","# this dataset and make predictions in each pixel.\n","IMAGE_FILE_PREFIX = 'Image_pixel_demo_'\n","\n","# The output path for the classified image (i.e. predictions) TFRecord file.\n","OUTPUT_IMAGE_FILE = 'gs://' + OUTPUT_BUCKET + '/Classified_pixel_demo.TFRecord'\n","# Export imagery in this region.\n","EXPORT_REGION = ee.Geometry.Rectangle([-122.7, 37.3, -121.8, 38.00])\n","# The name of the Earth Engine asset to be created by importing\n","# the classified image from the TFRecord file in Cloud Storage.\n","OUTPUT_ASSET_ID = 'users/' + USER_NAME + '/Classified_pixel_demo'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ZcjQnHH8zT4q","colab_type":"text"},"source":["# Get Training and Testing data from Earth Engine\n","\n","To get data for a classification model of three classes (bare, vegetation, water), we need labels and the value of predictor variables for each labeled example. We've already generated some labels in Earth Engine. Specifically, these are visually interpreted points labeled \"bare,\" \"vegetation,\" or \"water\" for a very simple classification demo ([example script](https://code.earthengine.google.com/?scriptPath=Examples%3ADemos%2FClassification)). For predictor variables, we'll use [Landsat 8 surface reflectance imagery](https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C01_T1_SR), bands 2-7."]},{"cell_type":"markdown","metadata":{"id":"0EJfjgelSOpN","colab_type":"text"},"source":["## Prepare Landsat 8 imagery\n","\n","First, make a cloud-masked median composite of Landsat 8 surface reflectance imagery from 2018. Check the composite by visualizing with folium."]},{"cell_type":"code","metadata":{"id":"DJYucYe3SPPr","colab_type":"code","colab":{}},"source":["# Cloud masking function.\n","def maskL8sr(image):\n"," cloudShadowBitMask = ee.Number(2).pow(3).int()\n"," cloudsBitMask = ee.Number(2).pow(5).int()\n"," qa = image.select('pixel_qa')\n"," mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(\n"," qa.bitwiseAnd(cloudsBitMask).eq(0))\n"," return image.updateMask(mask).select(BANDS).divide(10000)\n","\n","# The image input data is a 2018 cloud-masked median composite.\n","image = L8SR.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()\n","\n","# Use folium to visualize the imagery.\n","mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","map = folium.Map(location=[38., -122.5])\n","\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='median composite',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UEeyPf3zSPct","colab_type":"text"},"source":["## Add pixel values of the composite to labeled points\n","\n","Some training labels have already been collected for you. Load the labeled points from an existing Earth Engine asset. Each point in this table has a property called `landcover` that stores the label, encoded as an integer. Here we overlay the points on imagery to get predictor variables along with labels."]},{"cell_type":"code","metadata":{"id":"iOedOKyRExHE","colab_type":"code","colab":{}},"source":["# Sample the image at the points and add a random column.\n","sample = image.sampleRegions(\n"," collection=LABEL_DATA, properties=[LABEL], scale=30).randomColumn()\n","\n","# Partition the sample approximately 70-30.\n","training = sample.filter(ee.Filter.lt('random', 0.7))\n","testing = sample.filter(ee.Filter.gte('random', 0.7))\n","\n","from pprint import pprint\n","\n","# Print the first couple points to verify.\n","pprint({'training': training.first().getInfo()})\n","pprint({'testing': testing.first().getInfo()})"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uNc7a2nRR4MI","colab_type":"text"},"source":["## Export the training and testing data\n","\n","Now that there's training and testing data in Earth Engine and you've inspected a couple examples to ensure that the information you need is present, it's time to materialize the datasets in a place where the TensorFlow model has access to them. You can do that by exporting the training and testing datasets to tables in TFRecord format ([learn more about TFRecord format](https://www.tensorflow.org/tutorials/load_data/tf-records)) in your Cloud Storage bucket."]},{"cell_type":"code","metadata":{"id":"Pb-aPvQc0Xvp","colab_type":"code","colab":{}},"source":["# Make sure you can see the output bucket. You must have write access.\n","print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + OUTPUT_BUCKET) \n"," else 'Can not find output Cloud Storage bucket.')"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Wtoqj0Db1TmJ","colab_type":"text"},"source":["Once you've verified the existence of the intended output bucket, run the exports."]},{"cell_type":"code","metadata":{"id":"TfVNQzg8R6Wy","colab_type":"code","colab":{}},"source":["# Create the tasks.\n","training_task = ee.batch.Export.table.toCloudStorage(\n"," collection=training,\n"," description='Training Export',\n"," fileNamePrefix=TRAIN_FILE_PREFIX,\n"," bucket=OUTPUT_BUCKET,\n"," fileFormat='TFRecord',\n"," selectors=FEATURE_NAMES)\n","\n","testing_task = ee.batch.Export.table.toCloudStorage(\n"," collection=testing,\n"," description='Testing Export',\n"," fileNamePrefix=TEST_FILE_PREFIX,\n"," bucket=OUTPUT_BUCKET,\n"," fileFormat='TFRecord',\n"," selectors=FEATURE_NAMES)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"QF4WGIekaS2s","colab_type":"code","colab":{}},"source":["# Start the tasks.\n","training_task.start()\n","testing_task.start()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"q7nFLuySISeC","colab_type":"text"},"source":["### Monitor task progress\n","\n","You can see all your Earth Engine tasks by listing them. Make sure the training and testing tasks are completed before continuing."]},{"cell_type":"code","metadata":{"id":"oEWvS5ekcEq0","colab_type":"code","colab":{}},"source":["# Print all tasks.\n","pprint(ee.batch.Task.list())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"43-c0JNFI_m6","colab_type":"text"},"source":["### Check existence of the exported files\n","\n","If you've seen the status of the export tasks change to `COMPLETED`, then check for the existence of the files in the output Cloud Storage bucket."]},{"cell_type":"code","metadata":{"id":"YDZfNl6yc0Kj","colab_type":"code","colab":{}},"source":["print('Found training file.' if tf.io.gfile.exists(TRAIN_FILE_PATH) \n"," else 'No training file found.')\n","print('Found testing file.' if tf.io.gfile.exists(TEST_FILE_PATH) \n"," else 'No testing file found.')"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"NA8QA8oQVo8V","colab_type":"text"},"source":["## Export the imagery\n","\n","You can also export imagery using TFRecord format. Specifically, export whatever imagery you want to be classified by the trained model into the output Cloud Storage bucket."]},{"cell_type":"code","metadata":{"id":"tVNhJYacVpEw","colab_type":"code","colab":{}},"source":["# Specify patch and file dimensions.\n","image_export_options = {\n"," 'patchDimensions': [256, 256],\n"," 'maxFileSize': 104857600,\n"," 'compressed': True\n","}\n","\n","# Setup the task.\n","image_task = ee.batch.Export.image.toCloudStorage(\n"," image=image,\n"," description='Image Export',\n"," fileNamePrefix=IMAGE_FILE_PREFIX,\n"," bucket=OUTPUT_BUCKET,\n"," scale=30,\n"," fileFormat='TFRecord',\n"," region=EXPORT_REGION.toGeoJSON()['coordinates'],\n"," formatOptions=image_export_options,\n",")"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"6SweCkHDaNE3","colab_type":"code","colab":{}},"source":["# Start the task.\n","image_task.start()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"JC8C53MRTG_E","colab_type":"text"},"source":["### Monitor task progress"]},{"cell_type":"code","metadata":{"id":"BmPHb779KOXm","colab_type":"code","colab":{}},"source":["# Print all tasks.\n","pprint(ee.batch.Task.list())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"SrUhA1JKLONj","colab_type":"text"},"source":["It's also possible to monitor an individual task. Here we poll the task until it's done. If you do this, please put a `sleep()` in the loop to avoid making too many requests. Note that this will block until complete (you can always halt the execution of this cell)."]},{"cell_type":"code","metadata":{"id":"rKZeZswloP11","colab_type":"code","colab":{}},"source":["import time\n","\n","while image_task.active():\n"," print('Polling for task (id: {}).'.format(image_task.id))\n"," time.sleep(30)\n","print('Done with image export.')"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"9vWdH_wlZCEk","colab_type":"text"},"source":["# Data preparation and pre-processing\n","\n","Read data from the TFRecord file into a `tf.data.Dataset`. Pre-process the dataset to get it into a suitable format for input to the model."]},{"cell_type":"markdown","metadata":{"id":"LS4jGTrEfz-1","colab_type":"text"},"source":["## Read into a `tf.data.Dataset`\n","\n","Here we are going to read a file in Cloud Storage into a `tf.data.Dataset`. ([these TensorFlow docs](https://www.tensorflow.org/guide/data) explain more about reading data into a `Dataset`). Check that you can read examples from the file. The purpose here is to ensure that we can read from the file without an error. The actual content is not necessarily human readable.\n","\n"]},{"cell_type":"code","metadata":{"id":"T3PKyDQW8Vpx","colab_type":"code","cellView":"code","colab":{}},"source":["# Create a dataset from the TFRecord file in Cloud Storage.\n","train_dataset = tf.data.TFRecordDataset(TRAIN_FILE_PATH, compression_type='GZIP')\n","# Print the first record to check.\n","print(iter(train_dataset).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BrDYm-ibKR6t","colab_type":"text"},"source":["## Define the structure of your data\n","\n","For parsing the exported TFRecord files, `featuresDict` is a mapping between feature names (recall that `featureNames` contains the band and label names) and `float32` [`tf.io.FixedLenFeature`](https://www.tensorflow.org/api_docs/python/tf/io/FixedLenFeature) objects. This mapping is necessary for telling TensorFlow how to read data in a TFRecord file into tensors. Specifically, **all numeric data exported from Earth Engine is exported as `float32`**.\n","\n","(Note: *features* in the TensorFlow context (i.e. [`tf.train.Feature`](https://www.tensorflow.org/api_docs/python/tf/train/Feature)) are not to be confused with Earth Engine features (i.e. [`ee.Feature`](https://developers.google.com/earth-engine/api_docs#eefeature)), where the former is a protocol message type for serialized data input to the model and the latter is a geometry-based geographic data structure.)"]},{"cell_type":"code","metadata":{"id":"-6JVQV5HKHMZ","colab_type":"code","cellView":"code","colab":{}},"source":["# List of fixed-length features, all of which are float32.\n","columns = [\n"," tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES\n","]\n","\n","# Dictionary with names as keys, features as values.\n","features_dict = dict(zip(FEATURE_NAMES, columns))\n","\n","pprint(features_dict)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"QNfaUPbcjuCO","colab_type":"text"},"source":["## Parse the dataset\n","\n","Now we need to make a parsing function for the data in the TFRecord files. The data comes in flattened 2D arrays per record and we want to use the first part of the array for input to the model and the last element of the array as the class label. The parsing function reads data from a serialized [`Example` proto](https://www.tensorflow.org/api_docs/python/tf/train/Example) into a dictionary in which the keys are the feature names and the values are the tensors storing the value of the features for that example. ([These TensorFlow docs](https://www.tensorflow.org/tutorials/load_data/tfrecord) explain more about reading `Example` protos from TFRecord files)."]},{"cell_type":"code","metadata":{"id":"x2Q0g3fBj2kD","colab_type":"code","cellView":"code","colab":{}},"source":["def parse_tfrecord(example_proto):\n"," \"\"\"The parsing function.\n","\n"," Read a serialized example into the structure defined by featuresDict.\n","\n"," Args:\n"," example_proto: a serialized Example.\n","\n"," Returns:\n"," A tuple of the predictors dictionary and the label, cast to an `int32`.\n"," \"\"\"\n"," parsed_features = tf.io.parse_single_example(example_proto, features_dict)\n"," labels = parsed_features.pop(LABEL)\n"," return parsed_features, tf.cast(labels, tf.int32)\n","\n","# Map the function over the dataset.\n","parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=5)\n","\n","# Print the first parsed record to check.\n","pprint(iter(parsed_dataset).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Nb8EyNT4Xnhb","colab_type":"text"},"source":["Note that each record of the parsed dataset contains a tuple. The first element of the tuple is a dictionary with bands for keys and the numeric value of the bands for values. The second element of the tuple is a class label."]},{"cell_type":"markdown","metadata":{"id":"xLCsxWOuEBmE","colab_type":"text"},"source":["## Create additional features\n","\n","Another thing we might want to do as part of the input process is to create new features, for example NDVI, a vegetation index computed from reflectance in two spectral bands. Here are some helper functions for that."]},{"cell_type":"code","metadata":{"id":"lT6v2RM_EB1E","colab_type":"code","cellView":"code","colab":{}},"source":["def normalized_difference(a, b):\n"," \"\"\"Compute normalized difference of two inputs.\n","\n"," Compute (a - b) / (a + b). If the denomenator is zero, add a small delta.\n","\n"," Args:\n"," a: an input tensor with shape=[1]\n"," b: an input tensor with shape=[1]\n","\n"," Returns:\n"," The normalized difference as a tensor.\n"," \"\"\"\n"," nd = (a - b) / (a + b)\n"," nd_inf = (a - b) / (a + b + 0.000001)\n"," return tf.where(tf.math.is_finite(nd), nd, nd_inf)\n","\n","def add_NDVI(features, label):\n"," \"\"\"Add NDVI to the dataset.\n"," Args:\n"," features: a dictionary of input tensors keyed by feature name.\n"," label: the target label\n","\n"," Returns:\n"," A tuple of the input dictionary with an NDVI tensor added and the label.\n"," \"\"\"\n"," features['NDVI'] = normalized_difference(features['B5'], features['B4'])\n"," return features, label"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nEx1RAXOZQkS","colab_type":"text"},"source":["# Model setup\n","\n","The basic workflow for classification in TensorFlow is:\n","\n","1. Create the model.\n","2. Train the model (i.e. `fit()`).\n","3. Use the trained model for inference (i.e. `predict()`).\n","\n","Here we'll create a `Sequential` neural network model using Keras. This simple model is inspired by examples in:\n","\n","* [The TensorFlow Get Started tutorial](https://www.tensorflow.org/tutorials/)\n","* [The TensorFlow Keras guide](https://www.tensorflow.org/guide/keras#build_a_simple_model)\n","* [The Keras `Sequential` model examples](https://keras.io/getting-started/sequential-model-guide/#multilayer-perceptron-mlp-for-multi-class-softmax-classification)\n","\n","Note that the model used here is purely for demonstration purposes and hasn't gone through any performance tuning."]},{"cell_type":"markdown","metadata":{"id":"t9pWa54oG-xl","colab_type":"text"},"source":["## Create the Keras model\n","\n","Before we create the model, there's still a wee bit of pre-processing to get the data into the right input shape and a format that can be used with cross-entropy loss. Specifically, Keras expects a list of inputs and a one-hot vector for the class. (See [the Keras loss function docs](https://keras.io/losses/), [the TensorFlow categorical identity docs](https://www.tensorflow.org/guide/feature_columns#categorical_identity_column) and [the `tf.one_hot` docs](https://www.tensorflow.org/api_docs/python/tf/one_hot) for details). \n","\n","Here we will use a simple neural network model with a 64 node hidden layer, a dropout layer and an output layer. Once the dataset has been prepared, define the model, compile it, fit it to the training data. See [the Keras `Sequential` model guide](https://keras.io/getting-started/sequential-model-guide/) for more details."]},{"cell_type":"code","metadata":{"id":"OCZq3VNpG--G","colab_type":"code","cellView":"code","colab":{}},"source":["from tensorflow import keras\n","\n","# Add NDVI.\n","input_dataset = parsed_dataset.map(add_NDVI)\n","\n","# Keras requires inputs as a tuple. Note that the inputs must be in the\n","# right shape. Also note that to use the categorical_crossentropy loss,\n","# the label needs to be turned into a one-hot vector.\n","def to_tuple(inputs, label):\n"," return (tf.transpose(list(inputs.values())),\n"," tf.one_hot(indices=label, depth=N_CLASSES))\n","\n","# Map the to_tuple function, shuffle and batch.\n","input_dataset = input_dataset.map(to_tuple).batch(8)\n","\n","# Define the layers in the model.\n","model = tf.keras.models.Sequential([\n"," tf.keras.layers.Dense(64, activation=tf.nn.relu),\n"," tf.keras.layers.Dropout(0.2),\n"," tf.keras.layers.Dense(N_CLASSES, activation=tf.nn.softmax)\n","])\n","\n","# Compile the model with the specified loss function.\n","model.compile(optimizer=tf.keras.optimizers.Adam(),\n"," loss='categorical_crossentropy',\n"," metrics=['accuracy'])\n","\n","# Fit the model to the training data.\n","model.fit(x=input_dataset, epochs=10)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Pa4ex_4eKiyb","colab_type":"text"},"source":["## Check model accuracy on the test set\n","\n","Now that we have a trained model, we can evaluate it using the test dataset. To do that, read and prepare the test dataset in the same way as the training dataset. Here we specify a batch size of 1 so that each example in the test set is used exactly once to compute model accuracy. For model steps, just specify a number larger than the test dataset size (ignore the warning)."]},{"cell_type":"code","metadata":{"id":"tE6d7FsrMa1p","colab_type":"code","cellView":"code","colab":{}},"source":["test_dataset = (\n"," tf.data.TFRecordDataset(TEST_FILE_PATH, compression_type='GZIP')\n"," .map(parse_tfrecord, num_parallel_calls=5)\n"," .map(add_NDVI)\n"," .map(to_tuple)\n"," .batch(1))\n","\n","model.evaluate(test_dataset)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nhHrnv3VR0DU","colab_type":"text"},"source":["# Use the trained model to classify an image from Earth Engine\n","\n","Now it's time to classify the image that was exported from Earth Engine. If the exported image is large, it will be split into multiple TFRecord files in its destination folder. There will also be a JSON sidecar file called \"the mixer\" that describes the format and georeferencing of the image. Here we will find the image files and the mixer file, getting some info out of the mixer that will be useful during model inference."]},{"cell_type":"markdown","metadata":{"id":"nmTayDitZgQ5","colab_type":"text"},"source":["## Find the image files and JSON mixer file in Cloud Storage\n","\n","Use `gsutil` to locate the files of interest in the output Cloud Storage bucket. Check to make sure your image export task finished before running the following."]},{"cell_type":"code","metadata":{"id":"oUv9WMpcVp8E","colab_type":"code","colab":{}},"source":["# Get a list of all the files in the output bucket.\n","files_list = !gsutil ls 'gs://'{OUTPUT_BUCKET}\n","# Get only the files generated by the image export.\n","exported_files_list = [s for s in files_list if IMAGE_FILE_PREFIX in s]\n","\n","# Get the list of image files and the JSON mixer file.\n","image_files_list = []\n","json_file = None\n","for f in exported_files_list:\n"," if f.endswith('.tfrecord.gz'):\n"," image_files_list.append(f)\n"," elif f.endswith('.json'):\n"," json_file = f\n","\n","# Make sure the files are in the right order.\n","image_files_list.sort()\n","\n","pprint(image_files_list)\n","print(json_file)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"RcjYG9fk53xL","colab_type":"text"},"source":["## Read the JSON mixer file\n","\n","The mixer contains metadata and georeferencing information for the exported patches, each of which is in a different file. Read the mixer to get some information needed for prediction."]},{"cell_type":"code","metadata":{"id":"Gn7Dr0AAd93_","colab_type":"code","colab":{}},"source":["import json\n","\n","# Load the contents of the mixer file to a JSON object.\n","json_text = !gsutil cat {json_file}\n","# Get a single string w/ newlines from the IPython.utils.text.SList\n","mixer = json.loads(json_text.nlstr)\n","pprint(mixer)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6xyzyPPJwpVI","colab_type":"text"},"source":["## Read the image files into a dataset\n","\n","You can feed the list of files (`imageFilesList`) directly to the `TFRecordDataset` constructor to make a combined dataset on which to perform inference. The input needs to be preprocessed differently than the training and testing. Mainly, this is because the pixels are written into records as patches, we need to read the patches in as one big tensor (one patch for each band), then flatten them into lots of little tensors."]},{"cell_type":"code","metadata":{"id":"tn8Kj3VfwpiJ","colab_type":"code","cellView":"code","colab":{}},"source":["# Get relevant info from the JSON mixer file.\n","patch_width = mixer['patchDimensions'][0]\n","patch_height = mixer['patchDimensions'][1]\n","patches = mixer['totalPatches']\n","patch_dimensions_flat = [patch_width * patch_height, 1]\n","\n","# Note that the tensors are in the shape of a patch, one patch for each band.\n","image_columns = [\n"," tf.io.FixedLenFeature(shape=patch_dimensions_flat, dtype=tf.float32) \n"," for k in BANDS\n","]\n","\n","# Parsing dictionary.\n","image_features_dict = dict(zip(BANDS, image_columns))\n","\n","# Note that you can make one dataset from many files by specifying a list.\n","image_dataset = tf.data.TFRecordDataset(image_files_list, compression_type='GZIP')\n","\n","# Parsing function.\n","def parse_image(example_proto):\n"," return tf.io.parse_single_example(example_proto, image_features_dict)\n","\n","# Parse the data into tensors, one long tensor per patch.\n","image_dataset = image_dataset.map(parse_image, num_parallel_calls=5)\n","\n","# Break our long tensors into many little ones.\n","image_dataset = image_dataset.flat_map(\n"," lambda features: tf.data.Dataset.from_tensor_slices(features)\n",")\n","\n","# Add additional features (NDVI).\n","image_dataset = image_dataset.map(\n"," # Add NDVI to a feature that doesn't have a label.\n"," lambda features: add_NDVI(features, None)[0]\n",")\n","\n","# Turn the dictionary in each record into a tuple without a label.\n","image_dataset = image_dataset.map(\n"," lambda data_dict: (tf.transpose(list(data_dict.values())), )\n",")\n","\n","# Turn each patch into a batch.\n","image_dataset = image_dataset.batch(patch_width * patch_height)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_2sfRemRRDkV","colab_type":"text"},"source":["## Generate predictions for the image pixels\n","\n","To get predictions in each pixel, run the image dataset through the trained model using `model.predict()`. Print the first prediction to see that the output is a list of the three class probabilities for each pixel. Running all predictions might take a while."]},{"cell_type":"code","metadata":{"id":"8VGhmiP_REBP","colab_type":"code","colab":{}},"source":["# Run prediction in batches, with as many steps as there are patches.\n","predictions = model.predict(image_dataset, steps=patches, verbose=1)\n","\n","# Note that the predictions come as a numpy array. Check the first one.\n","print(predictions[0])"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"bPU2VlPOikAy","colab_type":"text"},"source":["## Write the predictions to a TFRecord file\n","\n","Now that there's a list of class probabilities in `predictions`, it's time to write them back into a file, optionally including a class label which is simply the index of the maximum probability. We'll write directly from TensorFlow to a file in the output Cloud Storage bucket.\n","\n","Iterate over the list, compute class label and write the class and the probabilities in patches. Specifically, we need to write the pixels into the file as patches in the same order they came out. The records are written as serialized `tf.train.Example` protos. This might take a while."]},{"cell_type":"code","metadata":{"id":"AkorbsEHepzJ","colab_type":"code","colab":{}},"source":["print('Writing to file ' + OUTPUT_IMAGE_FILE)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"kATMknHc0qeR","colab_type":"code","cellView":"code","colab":{}},"source":["# Instantiate the writer.\n","writer = tf.io.TFRecordWriter(OUTPUT_IMAGE_FILE)\n","\n","# Every patch-worth of predictions we'll dump an example into the output\n","# file with a single feature that holds our predictions. Since our predictions\n","# are already in the order of the exported data, the patches we create here\n","# will also be in the right order.\n","patch = [[], [], [], []]\n","cur_patch = 1\n","for prediction in predictions:\n"," patch[0].append(tf.argmax(prediction, 1))\n"," patch[1].append(prediction[0][0])\n"," patch[2].append(prediction[0][1])\n"," patch[3].append(prediction[0][2])\n"," # Once we've seen a patches-worth of class_ids...\n"," if (len(patch[0]) == patch_width * patch_height):\n"," print('Done with patch ' + str(cur_patch) + ' of ' + str(patches) + '...')\n"," # Create an example\n"," example = tf.train.Example(\n"," features=tf.train.Features(\n"," feature={\n"," 'prediction': tf.train.Feature(\n"," int64_list=tf.train.Int64List(\n"," value=patch[0])),\n"," 'bareProb': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=patch[1])),\n"," 'vegProb': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=patch[2])),\n"," 'waterProb': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=patch[3])),\n"," }\n"," )\n"," )\n"," # Write the example to the file and clear our patch array so it's ready for\n"," # another batch of class ids\n"," writer.write(example.SerializeToString())\n"," patch = [[], [], [], []]\n"," cur_patch += 1\n","\n","writer.close()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"1K_1hKs0aBdA","colab_type":"text"},"source":["# Upload the classifications to an Earth Engine asset"]},{"cell_type":"markdown","metadata":{"id":"M6sNZXWOSa82","colab_type":"text"},"source":["## Verify the existence of the predictions file\n","\n","At this stage, there should be a predictions TFRecord file sitting in the output Cloud Storage bucket. Use the `gsutil` command to verify that the predictions image (and associated mixer JSON) exist and have non-zero size."]},{"cell_type":"code","metadata":{"id":"6ZVWDPefUCgA","colab_type":"code","colab":{}},"source":["!gsutil ls -l {OUTPUT_IMAGE_FILE}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"2ZyCo297Clcx","colab_type":"text"},"source":["## Upload the classified image to Earth Engine\n","\n","Upload the image to Earth Engine directly from the Cloud Storage bucket with the [`earthengine` command](https://developers.google.com/earth-engine/command_line#upload). Provide both the image TFRecord file and the JSON file as arguments to `earthengine upload`."]},{"cell_type":"code","metadata":{"id":"NXulMNl9lTDv","colab_type":"code","cellView":"code","colab":{}},"source":["print('Uploading to ' + OUTPUT_ASSET_ID)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"V64tcVxsO5h6","colab_type":"code","colab":{}},"source":["# Start the upload.\n","!earthengine upload image --asset_id={OUTPUT_ASSET_ID} --pyramiding_policy=mode {OUTPUT_IMAGE_FILE} {json_file}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Yt4HyhUU_Bal","colab_type":"text"},"source":["## Check the status of the asset ingestion\n","\n","You can also use the Earth Engine API to check the status of your asset upload. It might take a while. The upload of the image is an asset ingestion task."]},{"cell_type":"code","metadata":{"id":"_vB-gwGhl_3C","colab_type":"code","cellView":"code","colab":{}},"source":["ee.batch.Task.list()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vvXvy9GDhM-p","colab_type":"text"},"source":["## View the ingested asset\n","\n","Display the vector of class probabilities as an RGB image with colors corresponding to the probability of bare, vegetation, water in a pixel. Also display the winning class using the same color palette."]},{"cell_type":"code","metadata":{"id":"kEkVxIyJiFd4","colab_type":"code","colab":{}},"source":["predictions_image = ee.Image(OUTPUT_ASSET_ID)\n","\n","prediction_vis = {\n"," 'bands': 'prediction',\n"," 'min': 0,\n"," 'max': 2,\n"," 'palette': ['red', 'green', 'blue']\n","}\n","probability_vis = {'bands': ['bareProb', 'vegProb', 'waterProb'], 'max': 0.5}\n","\n","prediction_map_id = predictions_image.getMapId(prediction_vis)\n","probability_map_id = predictions_image.getMapId(probability_vis)\n","\n","map = folium.Map(location=[37.6413, -122.2582])\n","folium.TileLayer(\n"," tiles=prediction_map_id['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='prediction',\n",").add_to(map)\n","folium.TileLayer(\n"," tiles=probability_map_id['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='probability',\n",").add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]}]} \ No newline at end of file +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "TF_demo1_keras.ipynb", + "provenance": [], + "private_outputs": true, + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "fSIfBsgi8dNK", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aV1xZ1CPi3Nw", + "colab_type": "text" + }, + "source": [ + "\u003ctable class=\"ee-notebook-buttons\" align=\"left\"\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/TF_demo1_keras.ipynb\"\u003e\n", + " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e Run in Google Colab\u003c/a\u003e\n", + "\u003c/td\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"https://github.com/google/earthengine-community/blob/master/guides/linked/TF_demo1_keras.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e View source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AC8adBmw-5m3", + "colab_type": "text" + }, + "source": [ + "# Introduction\n", + "\n", + "This is an Earth Engine \u003c\u003e TensorFlow demonstration notebook. Specifically, this notebook shows:\n", + "\n", + "1. Exporting training/testing data from Earth Engine in TFRecord format.\n", + "2. Preparing the data for use in a TensorFlow model.\n", + "2. Training and validating a simple model (Keras `Sequential` neural network) in TensorFlow.\n", + "3. Making predictions on image data exported from Earth Engine in TFRecord format.\n", + "4. Ingesting classified image data to Earth Engine in TFRecord format.\n", + "\n", + "This is intended to demonstrate a complete i/o pipeline. For a workflow that uses a [Google AI Platform](https://cloud.google.com/ai-platform) hosted model making predictions interactively, see [this example notebook](http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_AI_Platform.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KiTyR3FNlv-O", + "colab_type": "text" + }, + "source": [ + "# Setup software libraries\n", + "\n", + "Import software libraries and/or authenticate as necessary." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dEM3FP4YakJg", + "colab_type": "text" + }, + "source": [ + "## Authenticate to Colab and Cloud\n", + "\n", + "To read/write from a Google Cloud Storage bucket to which you have access, it's necessary to authenticate (as yourself). *This should be the same account you use to login to Earth Engine*. When you run the code below, it will display a link in the output to an authentication page in your browser. Follow the link to a page that will let you grant permission to the Cloud SDK to access your resources. Copy the code from the permissions page back into this notebook and press return to complete the process.\n", + "\n", + "(You may need to run this again if you get a credentials error later.)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "sYyTIPLsvMWl", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ejxa1MQjEGv9", + "colab_type": "text" + }, + "source": [ + "## Authenticate to Earth Engine\n", + "\n", + "Authenticate to Earth Engine the same way you did to the Colab notebook. Specifically, run the code to display a link to a permissions page. This gives you access to your Earth Engine account. *This should be the same account you used to login to Cloud previously*. Copy the code from the Earth Engine permissions page back into the notebook and press return to complete the process." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HzwiVqbcmJIX", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "import ee\n", + "ee.Authenticate()\n", + "ee.Initialize(project='my-project')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iJ70EsoWND_0", + "colab_type": "text" + }, + "source": [ + "## Test the TensorFlow installation\n", + "\n", + "Import the TensorFlow library and check the version." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i1PrYRLaVw_g", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b8Xcvjp6cLOL", + "colab_type": "text" + }, + "source": [ + "## Test the Folium installation\n", + "\n", + "We will use the Folium library for visualization. Import the library and check the version." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YiVgOXzBZJSn", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import folium\n", + "print(folium.__version__)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DrXLkJC2QJdP", + "colab_type": "text" + }, + "source": [ + "# Define variables\n", + "\n", + "This set of global variables will be used throughout. For this demo, you must have a Cloud Storage bucket into which you can write files. ([learn more about creating Cloud Storage buckets](https://cloud.google.com/storage/docs/creating-buckets)). You'll also need to specify your Earth Engine username, i.e. `users/USER_NAME` on the [Code Editor](https://code.earthengine.google.com/) Assets tab." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GHTOc5YLQZ5B", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Your Earth Engine username. This is used to import a classified image\n", + "# into your Earth Engine assets folder.\n", + "USER_NAME = 'username'\n", + "\n", + "# Cloud Storage bucket into which training, testing and prediction \n", + "# datasets will be written. You must be able to write into this bucket.\n", + "OUTPUT_BUCKET = 'your-bucket'\n", + "\n", + "# Use Landsat 8 surface reflectance data for predictors.\n", + "L8SR = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\n", + "# Use these bands for prediction.\n", + "BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']\n", + "\n", + "# This is a training/testing dataset of points with known land cover labels.\n", + "LABEL_DATA = ee.FeatureCollection('projects/google/demo_landcover_labels')\n", + "# The labels, consecutive integer indices starting from zero, are stored in\n", + "# this property, set on each point.\n", + "LABEL = 'landcover'\n", + "# Number of label values, i.e. number of classes in the classification.\n", + "N_CLASSES = 3\n", + "\n", + "# These names are used to specify properties in the export of\n", + "# training/testing data and to define the mapping between names and data\n", + "# when reading into TensorFlow datasets.\n", + "FEATURE_NAMES = list(BANDS)\n", + "FEATURE_NAMES.append(LABEL)\n", + "\n", + "# File names for the training and testing datasets. These TFRecord files\n", + "# will be exported from Earth Engine into the Cloud Storage bucket.\n", + "TRAIN_FILE_PREFIX = 'Training_demo'\n", + "TEST_FILE_PREFIX = 'Testing_demo'\n", + "file_extension = '.tfrecord.gz'\n", + "TRAIN_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension\n", + "TEST_FILE_PATH = 'gs://' + OUTPUT_BUCKET + '/' + TEST_FILE_PREFIX + file_extension\n", + "\n", + "# File name for the prediction (image) dataset. The trained model will read\n", + "# this dataset and make predictions in each pixel.\n", + "IMAGE_FILE_PREFIX = 'Image_pixel_demo_'\n", + "\n", + "# The output path for the classified image (i.e. predictions) TFRecord file.\n", + "OUTPUT_IMAGE_FILE = 'gs://' + OUTPUT_BUCKET + '/Classified_pixel_demo.TFRecord'\n", + "# Export imagery in this region.\n", + "EXPORT_REGION = ee.Geometry.Rectangle([-122.7, 37.3, -121.8, 38.00])\n", + "# The name of the Earth Engine asset to be created by importing\n", + "# the classified image from the TFRecord file in Cloud Storage.\n", + "OUTPUT_ASSET_ID = 'users/' + USER_NAME + '/Classified_pixel_demo'" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZcjQnHH8zT4q", + "colab_type": "text" + }, + "source": [ + "# Get Training and Testing data from Earth Engine\n", + "\n", + "To get data for a classification model of three classes (bare, vegetation, water), we need labels and the value of predictor variables for each labeled example. We've already generated some labels in Earth Engine. Specifically, these are visually interpreted points labeled \"bare,\" \"vegetation,\" or \"water\" for a very simple classification demo ([example script](https://code.earthengine.google.com/?scriptPath=Examples%3ADemos%2FClassification)). For predictor variables, we'll use [Landsat 8 surface reflectance imagery](https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C01_T1_SR), bands 2-7." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0EJfjgelSOpN", + "colab_type": "text" + }, + "source": [ + "## Prepare Landsat 8 imagery\n", + "\n", + "First, make a cloud-masked median composite of Landsat 8 surface reflectance imagery from 2018. Check the composite by visualizing with folium." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DJYucYe3SPPr", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Cloud masking function.\n", + "def maskL8sr(image):\n", + " cloudShadowBitMask = ee.Number(2).pow(3).int()\n", + " cloudsBitMask = ee.Number(2).pow(5).int()\n", + " qa = image.select('pixel_qa')\n", + " mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(\n", + " qa.bitwiseAnd(cloudsBitMask).eq(0))\n", + " return image.updateMask(mask).select(BANDS).divide(10000)\n", + "\n", + "# The image input data is a 2018 cloud-masked median composite.\n", + "image = L8SR.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()\n", + "\n", + "# Use folium to visualize the imagery.\n", + "mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n", + "map = folium.Map(location=[38., -122.5])\n", + "\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='median composite',\n", + " ).add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UEeyPf3zSPct", + "colab_type": "text" + }, + "source": [ + "## Add pixel values of the composite to labeled points\n", + "\n", + "Some training labels have already been collected for you. Load the labeled points from an existing Earth Engine asset. Each point in this table has a property called `landcover` that stores the label, encoded as an integer. Here we overlay the points on imagery to get predictor variables along with labels." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iOedOKyRExHE", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Sample the image at the points and add a random column.\n", + "sample = image.sampleRegions(\n", + " collection=LABEL_DATA, properties=[LABEL], scale=30).randomColumn()\n", + "\n", + "# Partition the sample approximately 70-30.\n", + "training = sample.filter(ee.Filter.lt('random', 0.7))\n", + "testing = sample.filter(ee.Filter.gte('random', 0.7))\n", + "\n", + "from pprint import pprint\n", + "\n", + "# Print the first couple points to verify.\n", + "pprint({'training': training.first().getInfo()})\n", + "pprint({'testing': testing.first().getInfo()})" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uNc7a2nRR4MI", + "colab_type": "text" + }, + "source": [ + "## Export the training and testing data\n", + "\n", + "Now that there's training and testing data in Earth Engine and you've inspected a couple examples to ensure that the information you need is present, it's time to materialize the datasets in a place where the TensorFlow model has access to them. You can do that by exporting the training and testing datasets to tables in TFRecord format ([learn more about TFRecord format](https://www.tensorflow.org/tutorials/load_data/tf-records)) in your Cloud Storage bucket." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Pb-aPvQc0Xvp", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Make sure you can see the output bucket. You must have write access.\n", + "print('Found Cloud Storage bucket.' if tf.io.gfile.exists('gs://' + OUTPUT_BUCKET) \n", + " else 'Can not find output Cloud Storage bucket.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wtoqj0Db1TmJ", + "colab_type": "text" + }, + "source": [ + "Once you've verified the existence of the intended output bucket, run the exports." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TfVNQzg8R6Wy", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Create the tasks.\n", + "training_task = ee.batch.Export.table.toCloudStorage(\n", + " collection=training,\n", + " description='Training Export',\n", + " fileNamePrefix=TRAIN_FILE_PREFIX,\n", + " bucket=OUTPUT_BUCKET,\n", + " fileFormat='TFRecord',\n", + " selectors=FEATURE_NAMES)\n", + "\n", + "testing_task = ee.batch.Export.table.toCloudStorage(\n", + " collection=testing,\n", + " description='Testing Export',\n", + " fileNamePrefix=TEST_FILE_PREFIX,\n", + " bucket=OUTPUT_BUCKET,\n", + " fileFormat='TFRecord',\n", + " selectors=FEATURE_NAMES)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "QF4WGIekaS2s", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Start the tasks.\n", + "training_task.start()\n", + "testing_task.start()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q7nFLuySISeC", + "colab_type": "text" + }, + "source": [ + "### Monitor task progress\n", + "\n", + "You can see all your Earth Engine tasks by listing them. Make sure the training and testing tasks are completed before continuing." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oEWvS5ekcEq0", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Print all tasks.\n", + "pprint(ee.batch.Task.list())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "43-c0JNFI_m6", + "colab_type": "text" + }, + "source": [ + "### Check existence of the exported files\n", + "\n", + "If you've seen the status of the export tasks change to `COMPLETED`, then check for the existence of the files in the output Cloud Storage bucket." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YDZfNl6yc0Kj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "print('Found training file.' if tf.io.gfile.exists(TRAIN_FILE_PATH) \n", + " else 'No training file found.')\n", + "print('Found testing file.' if tf.io.gfile.exists(TEST_FILE_PATH) \n", + " else 'No testing file found.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NA8QA8oQVo8V", + "colab_type": "text" + }, + "source": [ + "## Export the imagery\n", + "\n", + "You can also export imagery using TFRecord format. Specifically, export whatever imagery you want to be classified by the trained model into the output Cloud Storage bucket." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tVNhJYacVpEw", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Specify patch and file dimensions.\n", + "image_export_options = {\n", + " 'patchDimensions': [256, 256],\n", + " 'maxFileSize': 104857600,\n", + " 'compressed': True\n", + "}\n", + "\n", + "# Setup the task.\n", + "image_task = ee.batch.Export.image.toCloudStorage(\n", + " image=image,\n", + " description='Image Export',\n", + " fileNamePrefix=IMAGE_FILE_PREFIX,\n", + " bucket=OUTPUT_BUCKET,\n", + " scale=30,\n", + " fileFormat='TFRecord',\n", + " region=EXPORT_REGION.toGeoJSON()['coordinates'],\n", + " formatOptions=image_export_options,\n", + ")" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "6SweCkHDaNE3", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Start the task.\n", + "image_task.start()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JC8C53MRTG_E", + "colab_type": "text" + }, + "source": [ + "### Monitor task progress" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BmPHb779KOXm", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Print all tasks.\n", + "pprint(ee.batch.Task.list())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SrUhA1JKLONj", + "colab_type": "text" + }, + "source": [ + "It's also possible to monitor an individual task. Here we poll the task until it's done. If you do this, please put a `sleep()` in the loop to avoid making too many requests. Note that this will block until complete (you can always halt the execution of this cell)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rKZeZswloP11", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import time\n", + "\n", + "while image_task.active():\n", + " print('Polling for task (id: {}).'.format(image_task.id))\n", + " time.sleep(30)\n", + "print('Done with image export.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9vWdH_wlZCEk", + "colab_type": "text" + }, + "source": [ + "# Data preparation and pre-processing\n", + "\n", + "Read data from the TFRecord file into a `tf.data.Dataset`. Pre-process the dataset to get it into a suitable format for input to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LS4jGTrEfz-1", + "colab_type": "text" + }, + "source": [ + "## Read into a `tf.data.Dataset`\n", + "\n", + "Here we are going to read a file in Cloud Storage into a `tf.data.Dataset`. ([these TensorFlow docs](https://www.tensorflow.org/guide/data) explain more about reading data into a `Dataset`). Check that you can read examples from the file. The purpose here is to ensure that we can read from the file without an error. The actual content is not necessarily human readable.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "T3PKyDQW8Vpx", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "# Create a dataset from the TFRecord file in Cloud Storage.\n", + "train_dataset = tf.data.TFRecordDataset(TRAIN_FILE_PATH, compression_type='GZIP')\n", + "# Print the first record to check.\n", + "print(iter(train_dataset).next())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BrDYm-ibKR6t", + "colab_type": "text" + }, + "source": [ + "## Define the structure of your data\n", + "\n", + "For parsing the exported TFRecord files, `featuresDict` is a mapping between feature names (recall that `featureNames` contains the band and label names) and `float32` [`tf.io.FixedLenFeature`](https://www.tensorflow.org/api_docs/python/tf/io/FixedLenFeature) objects. This mapping is necessary for telling TensorFlow how to read data in a TFRecord file into tensors. Specifically, **all numeric data exported from Earth Engine is exported as `float32`**.\n", + "\n", + "(Note: *features* in the TensorFlow context (i.e. [`tf.train.Feature`](https://www.tensorflow.org/api_docs/python/tf/train/Feature)) are not to be confused with Earth Engine features (i.e. [`ee.Feature`](https://developers.google.com/earth-engine/api_docs#eefeature)), where the former is a protocol message type for serialized data input to the model and the latter is a geometry-based geographic data structure.)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-6JVQV5HKHMZ", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "# List of fixed-length features, all of which are float32.\n", + "columns = [\n", + " tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES\n", + "]\n", + "\n", + "# Dictionary with names as keys, features as values.\n", + "features_dict = dict(zip(FEATURE_NAMES, columns))\n", + "\n", + "pprint(features_dict)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QNfaUPbcjuCO", + "colab_type": "text" + }, + "source": [ + "## Parse the dataset\n", + "\n", + "Now we need to make a parsing function for the data in the TFRecord files. The data comes in flattened 2D arrays per record and we want to use the first part of the array for input to the model and the last element of the array as the class label. The parsing function reads data from a serialized [`Example` proto](https://www.tensorflow.org/api_docs/python/tf/train/Example) into a dictionary in which the keys are the feature names and the values are the tensors storing the value of the features for that example. ([These TensorFlow docs](https://www.tensorflow.org/tutorials/load_data/tfrecord) explain more about reading `Example` protos from TFRecord files)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "x2Q0g3fBj2kD", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "def parse_tfrecord(example_proto):\n", + " \"\"\"The parsing function.\n", + "\n", + " Read a serialized example into the structure defined by featuresDict.\n", + "\n", + " Args:\n", + " example_proto: a serialized Example.\n", + "\n", + " Returns:\n", + " A tuple of the predictors dictionary and the label, cast to an `int32`.\n", + " \"\"\"\n", + " parsed_features = tf.io.parse_single_example(example_proto, features_dict)\n", + " labels = parsed_features.pop(LABEL)\n", + " return parsed_features, tf.cast(labels, tf.int32)\n", + "\n", + "# Map the function over the dataset.\n", + "parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=5)\n", + "\n", + "# Print the first parsed record to check.\n", + "pprint(iter(parsed_dataset).next())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Nb8EyNT4Xnhb", + "colab_type": "text" + }, + "source": [ + "Note that each record of the parsed dataset contains a tuple. The first element of the tuple is a dictionary with bands for keys and the numeric value of the bands for values. The second element of the tuple is a class label." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xLCsxWOuEBmE", + "colab_type": "text" + }, + "source": [ + "## Create additional features\n", + "\n", + "Another thing we might want to do as part of the input process is to create new features, for example NDVI, a vegetation index computed from reflectance in two spectral bands. Here are some helper functions for that." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lT6v2RM_EB1E", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "def normalized_difference(a, b):\n", + " \"\"\"Compute normalized difference of two inputs.\n", + "\n", + " Compute (a - b) / (a + b). If the denomenator is zero, add a small delta.\n", + "\n", + " Args:\n", + " a: an input tensor with shape=[1]\n", + " b: an input tensor with shape=[1]\n", + "\n", + " Returns:\n", + " The normalized difference as a tensor.\n", + " \"\"\"\n", + " nd = (a - b) / (a + b)\n", + " nd_inf = (a - b) / (a + b + 0.000001)\n", + " return tf.where(tf.math.is_finite(nd), nd, nd_inf)\n", + "\n", + "def add_NDVI(features, label):\n", + " \"\"\"Add NDVI to the dataset.\n", + " Args:\n", + " features: a dictionary of input tensors keyed by feature name.\n", + " label: the target label\n", + "\n", + " Returns:\n", + " A tuple of the input dictionary with an NDVI tensor added and the label.\n", + " \"\"\"\n", + " features['NDVI'] = normalized_difference(features['B5'], features['B4'])\n", + " return features, label" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nEx1RAXOZQkS", + "colab_type": "text" + }, + "source": [ + "# Model setup\n", + "\n", + "The basic workflow for classification in TensorFlow is:\n", + "\n", + "1. Create the model.\n", + "2. Train the model (i.e. `fit()`).\n", + "3. Use the trained model for inference (i.e. `predict()`).\n", + "\n", + "Here we'll create a `Sequential` neural network model using Keras. This simple model is inspired by examples in:\n", + "\n", + "* [The TensorFlow Get Started tutorial](https://www.tensorflow.org/tutorials/)\n", + "* [The TensorFlow Keras guide](https://www.tensorflow.org/guide/keras#build_a_simple_model)\n", + "* [The Keras `Sequential` model examples](https://keras.io/getting-started/sequential-model-guide/#multilayer-perceptron-mlp-for-multi-class-softmax-classification)\n", + "\n", + "Note that the model used here is purely for demonstration purposes and hasn't gone through any performance tuning." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t9pWa54oG-xl", + "colab_type": "text" + }, + "source": [ + "## Create the Keras model\n", + "\n", + "Before we create the model, there's still a wee bit of pre-processing to get the data into the right input shape and a format that can be used with cross-entropy loss. Specifically, Keras expects a list of inputs and a one-hot vector for the class. (See [the Keras loss function docs](https://keras.io/losses/), [the TensorFlow categorical identity docs](https://www.tensorflow.org/guide/feature_columns#categorical_identity_column) and [the `tf.one_hot` docs](https://www.tensorflow.org/api_docs/python/tf/one_hot) for details). \n", + "\n", + "Here we will use a simple neural network model with a 64 node hidden layer, a dropout layer and an output layer. Once the dataset has been prepared, define the model, compile it, fit it to the training data. See [the Keras `Sequential` model guide](https://keras.io/getting-started/sequential-model-guide/) for more details." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OCZq3VNpG--G", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "from tensorflow import keras\n", + "\n", + "# Add NDVI.\n", + "input_dataset = parsed_dataset.map(add_NDVI)\n", + "\n", + "# Keras requires inputs as a tuple. Note that the inputs must be in the\n", + "# right shape. Also note that to use the categorical_crossentropy loss,\n", + "# the label needs to be turned into a one-hot vector.\n", + "def to_tuple(inputs, label):\n", + " return (tf.transpose(list(inputs.values())),\n", + " tf.one_hot(indices=label, depth=N_CLASSES))\n", + "\n", + "# Map the to_tuple function, shuffle and batch.\n", + "input_dataset = input_dataset.map(to_tuple).batch(8)\n", + "\n", + "# Define the layers in the model.\n", + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Dense(64, activation=tf.nn.relu),\n", + " tf.keras.layers.Dropout(0.2),\n", + " tf.keras.layers.Dense(N_CLASSES, activation=tf.nn.softmax)\n", + "])\n", + "\n", + "# Compile the model with the specified loss function.\n", + "model.compile(optimizer=tf.keras.optimizers.Adam(),\n", + " loss='categorical_crossentropy',\n", + " metrics=['accuracy'])\n", + "\n", + "# Fit the model to the training data.\n", + "model.fit(x=input_dataset, epochs=10)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pa4ex_4eKiyb", + "colab_type": "text" + }, + "source": [ + "## Check model accuracy on the test set\n", + "\n", + "Now that we have a trained model, we can evaluate it using the test dataset. To do that, read and prepare the test dataset in the same way as the training dataset. Here we specify a batch size of 1 so that each example in the test set is used exactly once to compute model accuracy. For model steps, just specify a number larger than the test dataset size (ignore the warning)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tE6d7FsrMa1p", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "test_dataset = (\n", + " tf.data.TFRecordDataset(TEST_FILE_PATH, compression_type='GZIP')\n", + " .map(parse_tfrecord, num_parallel_calls=5)\n", + " .map(add_NDVI)\n", + " .map(to_tuple)\n", + " .batch(1))\n", + "\n", + "model.evaluate(test_dataset)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nhHrnv3VR0DU", + "colab_type": "text" + }, + "source": [ + "# Use the trained model to classify an image from Earth Engine\n", + "\n", + "Now it's time to classify the image that was exported from Earth Engine. If the exported image is large, it will be split into multiple TFRecord files in its destination folder. There will also be a JSON sidecar file called \"the mixer\" that describes the format and georeferencing of the image. Here we will find the image files and the mixer file, getting some info out of the mixer that will be useful during model inference." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nmTayDitZgQ5", + "colab_type": "text" + }, + "source": [ + "## Find the image files and JSON mixer file in Cloud Storage\n", + "\n", + "Use `gsutil` to locate the files of interest in the output Cloud Storage bucket. Check to make sure your image export task finished before running the following." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oUv9WMpcVp8E", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Get a list of all the files in the output bucket.\n", + "files_list = !gsutil ls 'gs://'{OUTPUT_BUCKET}\n", + "# Get only the files generated by the image export.\n", + "exported_files_list = [s for s in files_list if IMAGE_FILE_PREFIX in s]\n", + "\n", + "# Get the list of image files and the JSON mixer file.\n", + "image_files_list = []\n", + "json_file = None\n", + "for f in exported_files_list:\n", + " if f.endswith('.tfrecord.gz'):\n", + " image_files_list.append(f)\n", + " elif f.endswith('.json'):\n", + " json_file = f\n", + "\n", + "# Make sure the files are in the right order.\n", + "image_files_list.sort()\n", + "\n", + "pprint(image_files_list)\n", + "print(json_file)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RcjYG9fk53xL", + "colab_type": "text" + }, + "source": [ + "## Read the JSON mixer file\n", + "\n", + "The mixer contains metadata and georeferencing information for the exported patches, each of which is in a different file. Read the mixer to get some information needed for prediction." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Gn7Dr0AAd93_", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import json\n", + "\n", + "# Load the contents of the mixer file to a JSON object.\n", + "json_text = !gsutil cat {json_file}\n", + "# Get a single string w/ newlines from the IPython.utils.text.SList\n", + "mixer = json.loads(json_text.nlstr)\n", + "pprint(mixer)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6xyzyPPJwpVI", + "colab_type": "text" + }, + "source": [ + "## Read the image files into a dataset\n", + "\n", + "You can feed the list of files (`imageFilesList`) directly to the `TFRecordDataset` constructor to make a combined dataset on which to perform inference. The input needs to be preprocessed differently than the training and testing. Mainly, this is because the pixels are written into records as patches, we need to read the patches in as one big tensor (one patch for each band), then flatten them into lots of little tensors." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tn8Kj3VfwpiJ", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "# Get relevant info from the JSON mixer file.\n", + "patch_width = mixer['patchDimensions'][0]\n", + "patch_height = mixer['patchDimensions'][1]\n", + "patches = mixer['totalPatches']\n", + "patch_dimensions_flat = [patch_width * patch_height, 1]\n", + "\n", + "# Note that the tensors are in the shape of a patch, one patch for each band.\n", + "image_columns = [\n", + " tf.io.FixedLenFeature(shape=patch_dimensions_flat, dtype=tf.float32) \n", + " for k in BANDS\n", + "]\n", + "\n", + "# Parsing dictionary.\n", + "image_features_dict = dict(zip(BANDS, image_columns))\n", + "\n", + "# Note that you can make one dataset from many files by specifying a list.\n", + "image_dataset = tf.data.TFRecordDataset(image_files_list, compression_type='GZIP')\n", + "\n", + "# Parsing function.\n", + "def parse_image(example_proto):\n", + " return tf.io.parse_single_example(example_proto, image_features_dict)\n", + "\n", + "# Parse the data into tensors, one long tensor per patch.\n", + "image_dataset = image_dataset.map(parse_image, num_parallel_calls=5)\n", + "\n", + "# Break our long tensors into many little ones.\n", + "image_dataset = image_dataset.flat_map(\n", + " lambda features: tf.data.Dataset.from_tensor_slices(features)\n", + ")\n", + "\n", + "# Add additional features (NDVI).\n", + "image_dataset = image_dataset.map(\n", + " # Add NDVI to a feature that doesn't have a label.\n", + " lambda features: add_NDVI(features, None)[0]\n", + ")\n", + "\n", + "# Turn the dictionary in each record into a tuple without a label.\n", + "image_dataset = image_dataset.map(\n", + " lambda data_dict: (tf.transpose(list(data_dict.values())), )\n", + ")\n", + "\n", + "# Turn each patch into a batch.\n", + "image_dataset = image_dataset.batch(patch_width * patch_height)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_2sfRemRRDkV", + "colab_type": "text" + }, + "source": [ + "## Generate predictions for the image pixels\n", + "\n", + "To get predictions in each pixel, run the image dataset through the trained model using `model.predict()`. Print the first prediction to see that the output is a list of the three class probabilities for each pixel. Running all predictions might take a while." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8VGhmiP_REBP", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Run prediction in batches, with as many steps as there are patches.\n", + "predictions = model.predict(image_dataset, steps=patches, verbose=1)\n", + "\n", + "# Note that the predictions come as a numpy array. Check the first one.\n", + "print(predictions[0])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bPU2VlPOikAy", + "colab_type": "text" + }, + "source": [ + "## Write the predictions to a TFRecord file\n", + "\n", + "Now that there's a list of class probabilities in `predictions`, it's time to write them back into a file, optionally including a class label which is simply the index of the maximum probability. We'll write directly from TensorFlow to a file in the output Cloud Storage bucket.\n", + "\n", + "Iterate over the list, compute class label and write the class and the probabilities in patches. Specifically, we need to write the pixels into the file as patches in the same order they came out. The records are written as serialized `tf.train.Example` protos. This might take a while." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AkorbsEHepzJ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "print('Writing to file ' + OUTPUT_IMAGE_FILE)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kATMknHc0qeR", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "# Instantiate the writer.\n", + "writer = tf.io.TFRecordWriter(OUTPUT_IMAGE_FILE)\n", + "\n", + "# Every patch-worth of predictions we'll dump an example into the output\n", + "# file with a single feature that holds our predictions. Since our predictions\n", + "# are already in the order of the exported data, the patches we create here\n", + "# will also be in the right order.\n", + "patch = [[], [], [], []]\n", + "cur_patch = 1\n", + "for prediction in predictions:\n", + " patch[0].append(tf.argmax(prediction, 1))\n", + " patch[1].append(prediction[0][0])\n", + " patch[2].append(prediction[0][1])\n", + " patch[3].append(prediction[0][2])\n", + " # Once we've seen a patches-worth of class_ids...\n", + " if (len(patch[0]) == patch_width * patch_height):\n", + " print('Done with patch ' + str(cur_patch) + ' of ' + str(patches) + '...')\n", + " # Create an example\n", + " example = tf.train.Example(\n", + " features=tf.train.Features(\n", + " feature={\n", + " 'prediction': tf.train.Feature(\n", + " int64_list=tf.train.Int64List(\n", + " value=patch[0])),\n", + " 'bareProb': tf.train.Feature(\n", + " float_list=tf.train.FloatList(\n", + " value=patch[1])),\n", + " 'vegProb': tf.train.Feature(\n", + " float_list=tf.train.FloatList(\n", + " value=patch[2])),\n", + " 'waterProb': tf.train.Feature(\n", + " float_list=tf.train.FloatList(\n", + " value=patch[3])),\n", + " }\n", + " )\n", + " )\n", + " # Write the example to the file and clear our patch array so it's ready for\n", + " # another batch of class ids\n", + " writer.write(example.SerializeToString())\n", + " patch = [[], [], [], []]\n", + " cur_patch += 1\n", + "\n", + "writer.close()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1K_1hKs0aBdA", + "colab_type": "text" + }, + "source": [ + "# Upload the classifications to an Earth Engine asset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M6sNZXWOSa82", + "colab_type": "text" + }, + "source": [ + "## Verify the existence of the predictions file\n", + "\n", + "At this stage, there should be a predictions TFRecord file sitting in the output Cloud Storage bucket. Use the `gsutil` command to verify that the predictions image (and associated mixer JSON) exist and have non-zero size." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6ZVWDPefUCgA", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!gsutil ls -l {OUTPUT_IMAGE_FILE}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ZyCo297Clcx", + "colab_type": "text" + }, + "source": [ + "## Upload the classified image to Earth Engine\n", + "\n", + "Upload the image to Earth Engine directly from the Cloud Storage bucket with the [`earthengine` command](https://developers.google.com/earth-engine/command_line#upload). Provide both the image TFRecord file and the JSON file as arguments to `earthengine upload`." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NXulMNl9lTDv", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "print('Uploading to ' + OUTPUT_ASSET_ID)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "V64tcVxsO5h6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Start the upload.\n", + "!earthengine upload image --asset_id={OUTPUT_ASSET_ID} --pyramiding_policy=mode {OUTPUT_IMAGE_FILE} {json_file}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yt4HyhUU_Bal", + "colab_type": "text" + }, + "source": [ + "## Check the status of the asset ingestion\n", + "\n", + "You can also use the Earth Engine API to check the status of your asset upload. It might take a while. The upload of the image is an asset ingestion task." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_vB-gwGhl_3C", + "colab_type": "code", + "cellView": "code", + "colab": {} + }, + "source": [ + "ee.batch.Task.list()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vvXvy9GDhM-p", + "colab_type": "text" + }, + "source": [ + "## View the ingested asset\n", + "\n", + "Display the vector of class probabilities as an RGB image with colors corresponding to the probability of bare, vegetation, water in a pixel. Also display the winning class using the same color palette." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kEkVxIyJiFd4", + "colab_type": "code", + "colab": {} + }, + "source": [ + "predictions_image = ee.Image(OUTPUT_ASSET_ID)\n", + "\n", + "prediction_vis = {\n", + " 'bands': 'prediction',\n", + " 'min': 0,\n", + " 'max': 2,\n", + " 'palette': ['red', 'green', 'blue']\n", + "}\n", + "probability_vis = {'bands': ['bareProb', 'vegProb', 'waterProb'], 'max': 0.5}\n", + "\n", + "prediction_map_id = predictions_image.getMapId(prediction_vis)\n", + "probability_map_id = predictions_image.getMapId(probability_vis)\n", + "\n", + "map = folium.Map(location=[37.6413, -122.2582])\n", + "folium.TileLayer(\n", + " tiles=prediction_map_id['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='prediction',\n", + ").add_to(map)\n", + "folium.TileLayer(\n", + " tiles=probability_map_id['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='probability',\n", + ").add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + } + ] +} diff --git a/guides/linked/UNET_regression_demo.ipynb b/guides/linked/UNET_regression_demo.ipynb index e4453d9a8..0c557efb7 100644 --- a/guides/linked/UNET_regression_demo.ipynb +++ b/guides/linked/UNET_regression_demo.ipynb @@ -1 +1,1036 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"UNET_regression_demo.ipynb","provenance":[{"file_id":"https://github.com/google/earthengine-community/blob/master/guides/linked/UNET_regression_demo.ipynb","timestamp":1586992475463}],"private_outputs":true,"collapsed_sections":[],"toc_visible":true,"machine_shape":"hm"},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"id":"esIMGVxhDI0f","colab_type":"code","colab":{}},"source":["#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aV1xZ1CPi3Nw","colab_type":"text"},"source":["
\n","\n"," Run in Google Colab\n","\n"," View source on GitHub
"]},{"cell_type":"markdown","metadata":{"id":"_SHAc5qbiR8l","colab_type":"text"},"source":["# Introduction\n","\n","This is an Earth Engine <> TensorFlow demonstration notebook. Suppose you want to predict a continuous output (regression) from a stack of continuous inputs. In this example, the output is impervious surface area from [NLCD](https://www.mrlc.gov/data) and the input is a Landsat 8 composite. The model is a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597). This notebook shows:\n","\n","1. Exporting training/testing patches from Earth Engine, suitable for training an FCNN model.\n","2. Preprocessing.\n","3. Training and validating an FCNN model.\n","4. Making predictions with the trained model and importing them to Earth Engine."]},{"cell_type":"markdown","metadata":{"id":"_MJ4kW1pEhwP","colab_type":"text"},"source":["# Setup software libraries\n","\n","Authenticate and import as necessary."]},{"cell_type":"code","metadata":{"id":"neIa46CpciXq","colab_type":"code","colab":{}},"source":["# Cloud authentication.\n","from google.colab import auth\n","auth.authenticate_user()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"jat01FEoUMqg","colab_type":"code","colab":{}},"source":["# Import, authenticate and initialize the Earth Engine library.\n","import ee\n","ee.Authenticate()\n","ee.Initialize()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"8RnZzcYhcpsQ","colab_type":"code","colab":{}},"source":["# Tensorflow setup.\n","import tensorflow as tf\n","print(tf.__version__)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"n1hFdpBQfyhN","colab_type":"code","colab":{}},"source":["# Folium setup.\n","import folium\n","print(folium.__version__)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"iT8ycmzClYwf","colab_type":"text"},"source":["# Variables\n","\n","Declare the variables that will be in use throughout the notebook."]},{"cell_type":"markdown","metadata":{"id":"qKs6HuxOzjMl","colab_type":"text"},"source":["## Specify your Cloud Storage Bucket\n","You must have write access to a bucket to run this demo! To run it read-only, use the demo bucket below, but note that writes to this bucket will not work."]},{"cell_type":"code","metadata":{"id":"obDDH1eDzsch","colab_type":"code","colab":{}},"source":["# INSERT YOUR BUCKET HERE:\n","BUCKET = 'your-bucket-name'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmfKLl9XcnGJ","colab_type":"text"},"source":["## Set other global variables"]},{"cell_type":"code","metadata":{"id":"psz7wJKalaoj","colab_type":"code","colab":{}},"source":["# Specify names locations for outputs in Cloud Storage. \n","FOLDER = 'fcnn-demo'\n","TRAINING_BASE = 'training_patches'\n","EVAL_BASE = 'eval_patches'\n","\n","# Specify inputs (Landsat bands) to the model and the response variable.\n","opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']\n","thermalBands = ['B10', 'B11']\n","BANDS = opticalBands + thermalBands\n","RESPONSE = 'impervious'\n","FEATURES = BANDS + [RESPONSE]\n","\n","# Specify the size and shape of patches expected by the model.\n","KERNEL_SIZE = 256\n","KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n","COLUMNS = [\n"," tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n","]\n","FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n","\n","# Sizes of the training and evaluation datasets.\n","TRAIN_SIZE = 16000\n","EVAL_SIZE = 8000\n","\n","# Specify model training parameters.\n","BATCH_SIZE = 16\n","EPOCHS = 10\n","BUFFER_SIZE = 2000\n","OPTIMIZER = 'SGD'\n","LOSS = 'MeanSquaredError'\n","METRICS = ['RootMeanSquaredError']"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hgoDc7Hilfc4","colab_type":"text"},"source":["# Imagery\n","\n","Gather and setup the imagery to use for inputs (predictors). This is a three-year, cloud-free, Landsat 8 composite. Display it in the notebook for a sanity check."]},{"cell_type":"code","metadata":{"id":"-IlgXu-vcUEY","colab_type":"code","colab":{}},"source":["# Use Landsat 8 surface reflectance data.\n","l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\n","\n","# Cloud masking function.\n","def maskL8sr(image):\n"," cloudShadowBitMask = ee.Number(2).pow(3).int()\n"," cloudsBitMask = ee.Number(2).pow(5).int()\n"," qa = image.select('pixel_qa')\n"," mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(\n"," qa.bitwiseAnd(cloudsBitMask).eq(0))\n"," mask2 = image.mask().reduce('min')\n"," mask3 = image.select(opticalBands).gt(0).And(\n"," image.select(opticalBands).lt(10000)).reduce('min')\n"," mask = mask1.And(mask2).And(mask3)\n"," return image.select(opticalBands).divide(10000).addBands(\n"," image.select(thermalBands).divide(10).clamp(273.15, 373.15)\n"," .subtract(273.15).divide(100)).updateMask(mask)\n","\n","# The image input data is a cloud-masked median composite.\n","image = l8sr.filterDate('2015-01-01', '2017-12-31').map(maskL8sr).median()\n","\n","# Use folium to visualize the imagery.\n","mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","map = folium.Map(location=[38., -122.5])\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='median composite',\n"," ).add_to(map)\n","\n","mapid = image.getMapId({'bands': ['B10'], 'min': 0, 'max': 0.5})\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='thermal',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gHznnctkJsZJ","colab_type":"text"},"source":["Prepare the response (what we want to predict). This is impervious surface area (in fraction of a pixel) from the 2016 NLCD dataset. Display to check."]},{"cell_type":"code","metadata":{"id":"e0wHDyxVirec","colab_type":"code","colab":{}},"source":["nlcd = ee.Image('USGS/NLCD/NLCD2016').select('impervious')\n","nlcd = nlcd.divide(100).float()\n","\n","mapid = nlcd.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[38., -122.5])\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='nlcd impervious',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CTS7_ZzPDhhg","colab_type":"text"},"source":["Stack the 2D images (Landsat composite and NLCD impervious surface) to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points."]},{"cell_type":"code","metadata":{"id":"eGHYsdAOipa4","colab_type":"code","colab":{}},"source":["featureStack = ee.Image.cat([\n"," image.select(BANDS),\n"," nlcd.select(RESPONSE)\n","]).float()\n","\n","list = ee.List.repeat(1, KERNEL_SIZE)\n","lists = ee.List.repeat(list, KERNEL_SIZE)\n","kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n","\n","arrays = featureStack.neighborhoodToArray(kernel)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"F4djSxBRG2el","colab_type":"text"},"source":["Use some pre-made geometries to sample the stack in strategic locations. Specifically, these are hand-made polygons in which to take the 256x256 samples. Display the sampling polygons on a map, red for training polygons, blue for evaluation."]},{"cell_type":"code","metadata":{"id":"ure_WaD0itQY","colab_type":"code","colab":{}},"source":["trainingPolys = ee.FeatureCollection('projects/google/DemoTrainingGeometries')\n","evalPolys = ee.FeatureCollection('projects/google/DemoEvalGeometries')\n","\n","polyImage = ee.Image(0).byte().paint(trainingPolys, 1).paint(evalPolys, 2)\n","polyImage = polyImage.updateMask(polyImage)\n","\n","mapid = polyImage.getMapId({'min': 1, 'max': 2, 'palette': ['red', 'blue']})\n","map = folium.Map(location=[38., -100.], zoom_start=5)\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='training polygons',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ZV890gPHeZqz","colab_type":"text"},"source":["# Sampling\n","\n","The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."]},{"cell_type":"code","metadata":{"id":"FyRpvwENxE-A","colab_type":"code","cellView":"both","colab":{}},"source":["# Convert the feature collections to lists for iteration.\n","trainingPolysList = trainingPolys.toList(trainingPolys.size())\n","evalPolysList = evalPolys.toList(evalPolys.size())\n","\n","# These numbers determined experimentally.\n","n = 200 # Number of shards in each polygon.\n","N = 2000 # Total sample size in each polygon.\n","\n","# Export all the training data (in many pieces), with one task \n","# per geometry.\n","for g in range(trainingPolys.size().getInfo()):\n"," geomSample = ee.FeatureCollection([])\n"," for i in range(n):\n"," sample = arrays.sample(\n"," region = ee.Feature(trainingPolysList.get(g)).geometry(), \n"," scale = 30,\n"," numPixels = N / n, # Size of the shard.\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n","\n"," desc = TRAINING_BASE + '_g' + str(g)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n","\n","# Export all the evaluation data.\n","for g in range(evalPolys.size().getInfo()):\n"," geomSample = ee.FeatureCollection([])\n"," for i in range(n):\n"," sample = arrays.sample(\n"," region = ee.Feature(evalPolysList.get(g)).geometry(), \n"," scale = 30,\n"," numPixels = N / n,\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n","\n"," desc = EVAL_BASE + '_g' + str(g)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rWXrvBE4607G","colab_type":"text"},"source":["# Training data\n","\n","Load the data exported from Earth Engine into a `tf.data.Dataset`. The following are helper functions for that."]},{"cell_type":"code","metadata":{"id":"WWZ0UXCVMyJP","colab_type":"code","colab":{}},"source":["def parse_tfrecord(example_proto):\n"," \"\"\"The parsing function.\n"," Read a serialized example into the structure defined by FEATURES_DICT.\n"," Args:\n"," example_proto: a serialized Example.\n"," Returns:\n"," A dictionary of tensors, keyed by feature name.\n"," \"\"\"\n"," return tf.io.parse_single_example(example_proto, FEATURES_DICT)\n","\n","\n","def to_tuple(inputs):\n"," \"\"\"Function to convert a dictionary of tensors to a tuple of (inputs, outputs).\n"," Turn the tensors returned by parse_tfrecord into a stack in HWC shape.\n"," Args:\n"," inputs: A dictionary of tensors, keyed by feature name.\n"," Returns:\n"," A tuple of (inputs, outputs).\n"," \"\"\"\n"," inputsList = [inputs.get(key) for key in FEATURES]\n"," stacked = tf.stack(inputsList, axis=0)\n"," # Convert from CHW to HWC\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]\n","\n","\n","def get_dataset(pattern):\n"," \"\"\"Function to read, parse and format to tuple a set of input tfrecord files.\n"," Get all the files matching the pattern, parse and convert to tuple.\n"," Args:\n"," pattern: A file pattern to match in a Cloud Storage bucket.\n"," Returns:\n"," A tf.data.Dataset\n"," \"\"\"\n"," glob = tf.io.gfile.glob(pattern)\n"," dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')\n"," dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)\n"," dataset = dataset.map(to_tuple, num_parallel_calls=5)\n"," return dataset"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Xg1fa18336D2","colab_type":"text"},"source":["Use the helpers to read in the training dataset. Print the first record to check."]},{"cell_type":"code","metadata":{"id":"rm0qRF0fAYcC","colab_type":"code","colab":{}},"source":["def get_training_dataset():\n","\t\"\"\"Get the preprocessed training dataset\n"," Returns: \n"," A tf.data.Dataset of training data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()\n","\treturn dataset\n","\n","training = get_training_dataset()\n","\n","print(iter(training.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"j-cQO5RL6vob","colab_type":"text"},"source":["# Evaluation data\n","\n","Now do the same thing to get an evaluation dataset. Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled."]},{"cell_type":"code","metadata":{"id":"ieKTCGiJ6xzo","colab_type":"code","colab":{}},"source":["def get_eval_dataset():\n","\t\"\"\"Get the preprocessed evaluation dataset\n"," Returns: \n"," A tf.data.Dataset of evaluation data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '*'\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.batch(1).repeat()\n","\treturn dataset\n","\n","evaluation = get_eval_dataset()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"9JIE7Yl87lgU","colab_type":"text"},"source":["# Model\n","\n","Here we use the Keras implementation of the U-Net model. The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output. We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem. Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here."]},{"cell_type":"code","metadata":{"id":"wsnnnz56yS3l","colab_type":"code","colab":{}},"source":["from tensorflow.python.keras import layers\n","from tensorflow.python.keras import losses\n","from tensorflow.python.keras import models\n","from tensorflow.python.keras import metrics\n","from tensorflow.python.keras import optimizers\n","\n","def conv_block(input_tensor, num_filters):\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\treturn encoder\n","\n","def encoder_block(input_tensor, num_filters):\n","\tencoder = conv_block(input_tensor, num_filters)\n","\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n","\treturn encoder_pool, encoder\n","\n","def decoder_block(input_tensor, concat_tensor, num_filters):\n","\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n","\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\treturn decoder\n","\n","def get_model():\n","\tinputs = layers.Input(shape=[None, None, len(BANDS)]) # 256\n","\tencoder0_pool, encoder0 = encoder_block(inputs, 32) # 128\n","\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64\n","\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32\n","\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16\n","\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8\n","\tcenter = conv_block(encoder4_pool, 1024) # center\n","\tdecoder4 = decoder_block(center, encoder4, 512) # 16\n","\tdecoder3 = decoder_block(decoder4, encoder3, 256) # 32\n","\tdecoder2 = decoder_block(decoder3, encoder2, 128) # 64\n","\tdecoder1 = decoder_block(decoder2, encoder1, 64) # 128\n","\tdecoder0 = decoder_block(decoder1, encoder0, 32) # 256\n","\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n","\n","\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n","\n","\tmodel.compile(\n","\t\toptimizer=optimizers.get(OPTIMIZER), \n","\t\tloss=losses.get(LOSS),\n","\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n","\n","\treturn model"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uu_E7OTDBCoS","colab_type":"text"},"source":["# Training the model\n","\n","You train a Keras model by calling `.fit()` on it. Here we're going to train for 10 epochs, which is suitable for demonstration purposes. For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)."]},{"cell_type":"code","metadata":{"id":"NzzaWxOhSxBy","colab_type":"code","colab":{}},"source":["m = get_model()\n","\n","m.fit(\n"," x=training, \n"," epochs=EPOCHS, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"U2XrwZHp66j4","colab_type":"text"},"source":["Note that the notebook VM is sometimes not heavy-duty enough to get through a whole training job, especially if you have a large buffer size or a large number of epochs. You can still use this notebook for training, but may need to set up an alternative VM ([learn more](https://research.google.com/colaboratory/local-runtimes.html)) for production use. Alternatively, you can package your code for running large training jobs on Google's AI Platform [as described here](https://cloud.google.com/ml-engine/docs/tensorflow/trainer-considerations). The following code loads a pre-trained model, which you can use for predictions right away."]},{"cell_type":"code","metadata":{"id":"-RJpNfEUS1qp","colab_type":"code","colab":{}},"source":["# Load a trained model. 50 epochs. 25 hours. Final RMSE ~0.08.\n","MODEL_DIR = 'gs://ee-docs-demos/fcnn-demo/trainer/model'\n","m = tf.keras.models.load_model(MODEL_DIR)\n","m.summary()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"J1ySNup0xCqN","colab_type":"text"},"source":["# Prediction\n","\n","The prediction pipeline is:\n","\n","1. Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storage bucket.\n","2. Use the trained model to make the predictions.\n","3. Write the predictions to a TFRecord file in a Cloud Storage.\n","4. Upload the predictions TFRecord file to Earth Engine.\n","\n","The following functions handle this process. It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time."]},{"cell_type":"code","metadata":{"id":"M3WDAa-RUpXP","colab_type":"code","colab":{}},"source":["def doExport(out_image_base, kernel_buffer, region):\n"," \"\"\"Run the image export task. Block until complete.\n"," \"\"\"\n"," task = ee.batch.Export.image.toCloudStorage(\n"," image = image.select(BANDS),\n"," description = out_image_base,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + out_image_base,\n"," region = region.getInfo()['coordinates'],\n"," scale = 30,\n"," fileFormat = 'TFRecord',\n"," maxPixels = 1e10,\n"," formatOptions = {\n"," 'patchDimensions': KERNEL_SHAPE,\n"," 'kernelSize': kernel_buffer,\n"," 'compressed': True,\n"," 'maxFileSize': 104857600\n"," }\n"," )\n"," task.start()\n","\n"," # Block until the task completes.\n"," print('Running image export to Cloud Storage...')\n"," import time\n"," while task.active():\n"," time.sleep(30)\n","\n"," # Error condition\n"," if task.status()['state'] != 'COMPLETED':\n"," print('Error with image export.')\n"," else:\n"," print('Image export completed.')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zb_9_FflygVw","colab_type":"code","colab":{}},"source":["def doPrediction(out_image_base, user_folder, kernel_buffer, region):\n"," \"\"\"Perform inference on exported imagery, upload to Earth Engine.\n"," \"\"\"\n","\n"," print('Looking for TFRecord files...')\n","\n"," # Get a list of all the files in the output bucket.\n"," filesList = !gsutil ls 'gs://'{BUCKET}'/'{FOLDER}\n","\n"," # Get only the files generated by the image export.\n"," exportFilesList = [s for s in filesList if out_image_base in s]\n","\n"," # Get the list of image files and the JSON mixer file.\n"," imageFilesList = []\n"," jsonFile = None\n"," for f in exportFilesList:\n"," if f.endswith('.tfrecord.gz'):\n"," imageFilesList.append(f)\n"," elif f.endswith('.json'):\n"," jsonFile = f\n","\n"," # Make sure the files are in the right order.\n"," imageFilesList.sort()\n","\n"," from pprint import pprint\n"," pprint(imageFilesList)\n"," print(jsonFile)\n","\n"," import json\n"," # Load the contents of the mixer file to a JSON object.\n"," jsonText = !gsutil cat {jsonFile}\n"," # Get a single string w/ newlines from the IPython.utils.text.SList\n"," mixer = json.loads(jsonText.nlstr)\n"," pprint(mixer)\n"," patches = mixer['totalPatches']\n","\n"," # Get set up for prediction.\n"," x_buffer = int(kernel_buffer[0] / 2)\n"," y_buffer = int(kernel_buffer[1] / 2)\n","\n"," buffered_shape = [\n"," KERNEL_SHAPE[0] + kernel_buffer[0],\n"," KERNEL_SHAPE[1] + kernel_buffer[1]]\n","\n"," imageColumns = [\n"," tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n"," for k in BANDS\n"," ]\n","\n"," imageFeaturesDict = dict(zip(BANDS, imageColumns))\n","\n"," def parse_image(example_proto):\n"," return tf.io.parse_single_example(example_proto, imageFeaturesDict)\n","\n"," def toTupleImage(inputs):\n"," inputsList = [inputs.get(key) for key in BANDS]\n"," stacked = tf.stack(inputsList, axis=0)\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," return stacked\n","\n"," # Create a dataset from the TFRecord file(s) in Cloud Storage.\n"," imageDataset = tf.data.TFRecordDataset(imageFilesList, compression_type='GZIP')\n"," imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n"," imageDataset = imageDataset.map(toTupleImage).batch(1)\n","\n"," # Perform inference.\n"," print('Running predictions...')\n"," predictions = m.predict(imageDataset, steps=patches, verbose=1)\n"," # print(predictions[0])\n","\n"," print('Writing predictions...')\n"," out_image_file = 'gs://' + BUCKET + '/' + FOLDER + '/' + out_image_base + '.TFRecord'\n"," writer = tf.io.TFRecordWriter(out_image_file)\n"," patches = 0\n"," for predictionPatch in predictions:\n"," print('Writing patch ' + str(patches) + '...')\n"," predictionPatch = predictionPatch[\n"," x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n","\n"," # Create an example.\n"," example = tf.train.Example(\n"," features=tf.train.Features(\n"," feature={\n"," 'impervious': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=predictionPatch.flatten()))\n"," }\n"," )\n"," )\n"," # Write the example.\n"," writer.write(example.SerializeToString())\n"," patches += 1\n","\n"," writer.close()\n","\n"," # Start the upload.\n"," out_image_asset = user_folder + '/' + out_image_base\n"," !earthengine upload image --asset_id={out_image_asset} {out_image_file} {jsonFile}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"LZqlymOehnQO","colab_type":"text"},"source":["Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs. In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)). Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch. This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer. For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel."]},{"cell_type":"code","metadata":{"id":"FPANwc7B1-TS","colab_type":"code","colab":{}},"source":["# Output assets folder: YOUR FOLDER\n","user_folder = 'users/username' # INSERT YOUR FOLDER HERE.\n","\n","# Base file name to use for TFRecord files and assets.\n","bj_image_base = 'FCNN_demo_beijing_384_'\n","# Half this will extend on the sides of each patch.\n","bj_kernel_buffer = [128, 128]\n","# Beijing\n","bj_region = ee.Geometry.Polygon(\n"," [[[115.9662455210937, 40.121362012835235],\n"," [115.9662455210937, 39.64293313749715],\n"," [117.01818643906245, 39.64293313749715],\n"," [117.01818643906245, 40.121362012835235]]], None, False)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lLNEOLkXWvSi","colab_type":"code","colab":{}},"source":["# Run the export.\n","doExport(bj_image_base, bj_kernel_buffer, bj_region)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"KxACnxKFrQ_J","colab_type":"code","colab":{}},"source":["# Run the prediction.\n","doPrediction(bj_image_base, user_folder, bj_kernel_buffer, bj_region)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uj_G9OZ1xH6K","colab_type":"text"},"source":["# Display the output\n","\n","One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset. Here, display the impervious area predictions over Beijing, China."]},{"cell_type":"code","metadata":{"id":"Jgco6HJ4R5p2","colab_type":"code","colab":{}},"source":["out_image = ee.Image(user_folder + '/' + bj_image_base)\n","mapid = out_image.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[39.898, 116.5097])\n","folium.TileLayer(\n"," tiles=mapid['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='predicted impervious',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]}]} \ No newline at end of file +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "UNET_regression_demo.ipynb", + "provenance": [ + { + "file_id": "https://github.com/google/earthengine-community/blob/master/guides/linked/UNET_regression_demo.ipynb", + "timestamp": 1586992475463 + } + ], + "private_outputs": true, + "collapsed_sections": [], + "toc_visible": true, + "machine_shape": "hm" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "esIMGVxhDI0f", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aV1xZ1CPi3Nw", + "colab_type": "text" + }, + "source": [ + "\u003ctable class=\"ee-notebook-buttons\" align=\"left\"\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/UNET_regression_demo.ipynb\"\u003e\n", + " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e Run in Google Colab\u003c/a\u003e\n", + "\u003c/td\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"https://github.com/google/earthengine-community/blob/master/guides/linked/UNET_regression_demo.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e View source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_SHAc5qbiR8l", + "colab_type": "text" + }, + "source": [ + "# Introduction\n", + "\n", + "This is an Earth Engine \u003c\u003e TensorFlow demonstration notebook. Suppose you want to predict a continuous output (regression) from a stack of continuous inputs. In this example, the output is impervious surface area from [NLCD](https://www.mrlc.gov/data) and the input is a Landsat 8 composite. The model is a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597). This notebook shows:\n", + "\n", + "1. Exporting training/testing patches from Earth Engine, suitable for training an FCNN model.\n", + "2. Preprocessing.\n", + "3. Training and validating an FCNN model.\n", + "4. Making predictions with the trained model and importing them to Earth Engine." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_MJ4kW1pEhwP", + "colab_type": "text" + }, + "source": [ + "# Setup software libraries\n", + "\n", + "Authenticate and import as necessary." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "neIa46CpciXq", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Cloud authentication.\n", + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jat01FEoUMqg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Import, authenticate and initialize the Earth Engine library.\n", + "import ee\n", + "ee.Authenticate()\n", + "ee.Initialize(project='my-project')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8RnZzcYhcpsQ", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Tensorflow setup.\n", + "import tensorflow as tf\n", + "print(tf.__version__)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "n1hFdpBQfyhN", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Folium setup.\n", + "import folium\n", + "print(folium.__version__)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iT8ycmzClYwf", + "colab_type": "text" + }, + "source": [ + "# Variables\n", + "\n", + "Declare the variables that will be in use throughout the notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qKs6HuxOzjMl", + "colab_type": "text" + }, + "source": [ + "## Specify your Cloud Storage Bucket\n", + "You must have write access to a bucket to run this demo! To run it read-only, use the demo bucket below, but note that writes to this bucket will not work." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "obDDH1eDzsch", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# INSERT YOUR BUCKET HERE:\n", + "BUCKET = 'your-bucket-name'" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wmfKLl9XcnGJ", + "colab_type": "text" + }, + "source": [ + "## Set other global variables" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "psz7wJKalaoj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Specify names locations for outputs in Cloud Storage. \n", + "FOLDER = 'fcnn-demo'\n", + "TRAINING_BASE = 'training_patches'\n", + "EVAL_BASE = 'eval_patches'\n", + "\n", + "# Specify inputs (Landsat bands) to the model and the response variable.\n", + "opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']\n", + "thermalBands = ['B10', 'B11']\n", + "BANDS = opticalBands + thermalBands\n", + "RESPONSE = 'impervious'\n", + "FEATURES = BANDS + [RESPONSE]\n", + "\n", + "# Specify the size and shape of patches expected by the model.\n", + "KERNEL_SIZE = 256\n", + "KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n", + "COLUMNS = [\n", + " tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n", + "]\n", + "FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n", + "\n", + "# Sizes of the training and evaluation datasets.\n", + "TRAIN_SIZE = 16000\n", + "EVAL_SIZE = 8000\n", + "\n", + "# Specify model training parameters.\n", + "BATCH_SIZE = 16\n", + "EPOCHS = 10\n", + "BUFFER_SIZE = 2000\n", + "OPTIMIZER = 'SGD'\n", + "LOSS = 'MeanSquaredError'\n", + "METRICS = ['RootMeanSquaredError']" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hgoDc7Hilfc4", + "colab_type": "text" + }, + "source": [ + "# Imagery\n", + "\n", + "Gather and setup the imagery to use for inputs (predictors). This is a three-year, cloud-free, Landsat 8 composite. Display it in the notebook for a sanity check." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-IlgXu-vcUEY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Use Landsat 8 surface reflectance data.\n", + "l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\n", + "\n", + "# Cloud masking function.\n", + "def maskL8sr(image):\n", + " cloudShadowBitMask = ee.Number(2).pow(3).int()\n", + " cloudsBitMask = ee.Number(2).pow(5).int()\n", + " qa = image.select('pixel_qa')\n", + " mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(\n", + " qa.bitwiseAnd(cloudsBitMask).eq(0))\n", + " mask2 = image.mask().reduce('min')\n", + " mask3 = image.select(opticalBands).gt(0).And(\n", + " image.select(opticalBands).lt(10000)).reduce('min')\n", + " mask = mask1.And(mask2).And(mask3)\n", + " return image.select(opticalBands).divide(10000).addBands(\n", + " image.select(thermalBands).divide(10).clamp(273.15, 373.15)\n", + " .subtract(273.15).divide(100)).updateMask(mask)\n", + "\n", + "# The image input data is a cloud-masked median composite.\n", + "image = l8sr.filterDate('2015-01-01', '2017-12-31').map(maskL8sr).median()\n", + "\n", + "# Use folium to visualize the imagery.\n", + "mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n", + "map = folium.Map(location=[38., -122.5])\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='median composite',\n", + " ).add_to(map)\n", + "\n", + "mapid = image.getMapId({'bands': ['B10'], 'min': 0, 'max': 0.5})\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='thermal',\n", + " ).add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gHznnctkJsZJ", + "colab_type": "text" + }, + "source": [ + "Prepare the response (what we want to predict). This is impervious surface area (in fraction of a pixel) from the 2016 NLCD dataset. Display to check." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "e0wHDyxVirec", + "colab_type": "code", + "colab": {} + }, + "source": [ + "nlcd = ee.Image('USGS/NLCD/NLCD2016').select('impervious')\n", + "nlcd = nlcd.divide(100).float()\n", + "\n", + "mapid = nlcd.getMapId({'min': 0, 'max': 1})\n", + "map = folium.Map(location=[38., -122.5])\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='nlcd impervious',\n", + " ).add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CTS7_ZzPDhhg", + "colab_type": "text" + }, + "source": [ + "Stack the 2D images (Landsat composite and NLCD impervious surface) to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eGHYsdAOipa4", + "colab_type": "code", + "colab": {} + }, + "source": [ + "featureStack = ee.Image.cat([\n", + " image.select(BANDS),\n", + " nlcd.select(RESPONSE)\n", + "]).float()\n", + "\n", + "list = ee.List.repeat(1, KERNEL_SIZE)\n", + "lists = ee.List.repeat(list, KERNEL_SIZE)\n", + "kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n", + "\n", + "arrays = featureStack.neighborhoodToArray(kernel)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F4djSxBRG2el", + "colab_type": "text" + }, + "source": [ + "Use some pre-made geometries to sample the stack in strategic locations. Specifically, these are hand-made polygons in which to take the 256x256 samples. Display the sampling polygons on a map, red for training polygons, blue for evaluation." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ure_WaD0itQY", + "colab_type": "code", + "colab": {} + }, + "source": [ + "trainingPolys = ee.FeatureCollection('projects/google/DemoTrainingGeometries')\n", + "evalPolys = ee.FeatureCollection('projects/google/DemoEvalGeometries')\n", + "\n", + "polyImage = ee.Image(0).byte().paint(trainingPolys, 1).paint(evalPolys, 2)\n", + "polyImage = polyImage.updateMask(polyImage)\n", + "\n", + "mapid = polyImage.getMapId({'min': 1, 'max': 2, 'palette': ['red', 'blue']})\n", + "map = folium.Map(location=[38., -100.], zoom_start=5)\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='training polygons',\n", + " ).add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZV890gPHeZqz", + "colab_type": "text" + }, + "source": [ + "# Sampling\n", + "\n", + "The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FyRpvwENxE-A", + "colab_type": "code", + "cellView": "both", + "colab": {} + }, + "source": [ + "# Convert the feature collections to lists for iteration.\n", + "trainingPolysList = trainingPolys.toList(trainingPolys.size())\n", + "evalPolysList = evalPolys.toList(evalPolys.size())\n", + "\n", + "# These numbers determined experimentally.\n", + "n = 200 # Number of shards in each polygon.\n", + "N = 2000 # Total sample size in each polygon.\n", + "\n", + "# Export all the training data (in many pieces), with one task \n", + "# per geometry.\n", + "for g in range(trainingPolys.size().getInfo()):\n", + " geomSample = ee.FeatureCollection([])\n", + " for i in range(n):\n", + " sample = arrays.sample(\n", + " region = ee.Feature(trainingPolysList.get(g)).geometry(), \n", + " scale = 30,\n", + " numPixels = N / n, # Size of the shard.\n", + " seed = i,\n", + " tileScale = 8\n", + " )\n", + " geomSample = geomSample.merge(sample)\n", + "\n", + " desc = TRAINING_BASE + '_g' + str(g)\n", + " task = ee.batch.Export.table.toCloudStorage(\n", + " collection = geomSample,\n", + " description = desc,\n", + " bucket = BUCKET,\n", + " fileNamePrefix = FOLDER + '/' + desc,\n", + " fileFormat = 'TFRecord',\n", + " selectors = BANDS + [RESPONSE]\n", + " )\n", + " task.start()\n", + "\n", + "# Export all the evaluation data.\n", + "for g in range(evalPolys.size().getInfo()):\n", + " geomSample = ee.FeatureCollection([])\n", + " for i in range(n):\n", + " sample = arrays.sample(\n", + " region = ee.Feature(evalPolysList.get(g)).geometry(), \n", + " scale = 30,\n", + " numPixels = N / n,\n", + " seed = i,\n", + " tileScale = 8\n", + " )\n", + " geomSample = geomSample.merge(sample)\n", + "\n", + " desc = EVAL_BASE + '_g' + str(g)\n", + " task = ee.batch.Export.table.toCloudStorage(\n", + " collection = geomSample,\n", + " description = desc,\n", + " bucket = BUCKET,\n", + " fileNamePrefix = FOLDER + '/' + desc,\n", + " fileFormat = 'TFRecord',\n", + " selectors = BANDS + [RESPONSE]\n", + " )\n", + " task.start()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWXrvBE4607G", + "colab_type": "text" + }, + "source": [ + "# Training data\n", + "\n", + "Load the data exported from Earth Engine into a `tf.data.Dataset`. The following are helper functions for that." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WWZ0UXCVMyJP", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def parse_tfrecord(example_proto):\n", + " \"\"\"The parsing function.\n", + " Read a serialized example into the structure defined by FEATURES_DICT.\n", + " Args:\n", + " example_proto: a serialized Example.\n", + " Returns:\n", + " A dictionary of tensors, keyed by feature name.\n", + " \"\"\"\n", + " return tf.io.parse_single_example(example_proto, FEATURES_DICT)\n", + "\n", + "\n", + "def to_tuple(inputs):\n", + " \"\"\"Function to convert a dictionary of tensors to a tuple of (inputs, outputs).\n", + " Turn the tensors returned by parse_tfrecord into a stack in HWC shape.\n", + " Args:\n", + " inputs: A dictionary of tensors, keyed by feature name.\n", + " Returns:\n", + " A tuple of (inputs, outputs).\n", + " \"\"\"\n", + " inputsList = [inputs.get(key) for key in FEATURES]\n", + " stacked = tf.stack(inputsList, axis=0)\n", + " # Convert from CHW to HWC\n", + " stacked = tf.transpose(stacked, [1, 2, 0])\n", + " return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]\n", + "\n", + "\n", + "def get_dataset(pattern):\n", + " \"\"\"Function to read, parse and format to tuple a set of input tfrecord files.\n", + " Get all the files matching the pattern, parse and convert to tuple.\n", + " Args:\n", + " pattern: A file pattern to match in a Cloud Storage bucket.\n", + " Returns:\n", + " A tf.data.Dataset\n", + " \"\"\"\n", + " glob = tf.io.gfile.glob(pattern)\n", + " dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')\n", + " dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)\n", + " dataset = dataset.map(to_tuple, num_parallel_calls=5)\n", + " return dataset" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xg1fa18336D2", + "colab_type": "text" + }, + "source": [ + "Use the helpers to read in the training dataset. Print the first record to check." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rm0qRF0fAYcC", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def get_training_dataset():\n", + "\t\"\"\"Get the preprocessed training dataset\n", + " Returns: \n", + " A tf.data.Dataset of training data.\n", + " \"\"\"\n", + "\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'\n", + "\tdataset = get_dataset(glob)\n", + "\tdataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()\n", + "\treturn dataset\n", + "\n", + "training = get_training_dataset()\n", + "\n", + "print(iter(training.take(1)).next())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j-cQO5RL6vob", + "colab_type": "text" + }, + "source": [ + "# Evaluation data\n", + "\n", + "Now do the same thing to get an evaluation dataset. Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ieKTCGiJ6xzo", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def get_eval_dataset():\n", + "\t\"\"\"Get the preprocessed evaluation dataset\n", + " Returns: \n", + " A tf.data.Dataset of evaluation data.\n", + " \"\"\"\n", + "\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '*'\n", + "\tdataset = get_dataset(glob)\n", + "\tdataset = dataset.batch(1).repeat()\n", + "\treturn dataset\n", + "\n", + "evaluation = get_eval_dataset()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9JIE7Yl87lgU", + "colab_type": "text" + }, + "source": [ + "# Model\n", + "\n", + "Here we use the Keras implementation of the U-Net model. The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output. We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem. Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wsnnnz56yS3l", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from tensorflow.python.keras import layers\n", + "from tensorflow.python.keras import losses\n", + "from tensorflow.python.keras import models\n", + "from tensorflow.python.keras import metrics\n", + "from tensorflow.python.keras import optimizers\n", + "\n", + "def conv_block(input_tensor, num_filters):\n", + "\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n", + "\tencoder = layers.BatchNormalization()(encoder)\n", + "\tencoder = layers.Activation('relu')(encoder)\n", + "\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n", + "\tencoder = layers.BatchNormalization()(encoder)\n", + "\tencoder = layers.Activation('relu')(encoder)\n", + "\treturn encoder\n", + "\n", + "def encoder_block(input_tensor, num_filters):\n", + "\tencoder = conv_block(input_tensor, num_filters)\n", + "\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n", + "\treturn encoder_pool, encoder\n", + "\n", + "def decoder_block(input_tensor, concat_tensor, num_filters):\n", + "\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n", + "\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n", + "\tdecoder = layers.BatchNormalization()(decoder)\n", + "\tdecoder = layers.Activation('relu')(decoder)\n", + "\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n", + "\tdecoder = layers.BatchNormalization()(decoder)\n", + "\tdecoder = layers.Activation('relu')(decoder)\n", + "\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n", + "\tdecoder = layers.BatchNormalization()(decoder)\n", + "\tdecoder = layers.Activation('relu')(decoder)\n", + "\treturn decoder\n", + "\n", + "def get_model():\n", + "\tinputs = layers.Input(shape=[None, None, len(BANDS)]) # 256\n", + "\tencoder0_pool, encoder0 = encoder_block(inputs, 32) # 128\n", + "\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64\n", + "\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32\n", + "\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16\n", + "\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8\n", + "\tcenter = conv_block(encoder4_pool, 1024) # center\n", + "\tdecoder4 = decoder_block(center, encoder4, 512) # 16\n", + "\tdecoder3 = decoder_block(decoder4, encoder3, 256) # 32\n", + "\tdecoder2 = decoder_block(decoder3, encoder2, 128) # 64\n", + "\tdecoder1 = decoder_block(decoder2, encoder1, 64) # 128\n", + "\tdecoder0 = decoder_block(decoder1, encoder0, 32) # 256\n", + "\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n", + "\n", + "\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n", + "\n", + "\tmodel.compile(\n", + "\t\toptimizer=optimizers.get(OPTIMIZER), \n", + "\t\tloss=losses.get(LOSS),\n", + "\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n", + "\n", + "\treturn model" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uu_E7OTDBCoS", + "colab_type": "text" + }, + "source": [ + "# Training the model\n", + "\n", + "You train a Keras model by calling `.fit()` on it. Here we're going to train for 10 epochs, which is suitable for demonstration purposes. For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NzzaWxOhSxBy", + "colab_type": "code", + "colab": {} + }, + "source": [ + "m = get_model()\n", + "\n", + "m.fit(\n", + " x=training, \n", + " epochs=EPOCHS, \n", + " steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n", + " validation_data=evaluation,\n", + " validation_steps=EVAL_SIZE)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U2XrwZHp66j4", + "colab_type": "text" + }, + "source": [ + "Note that the notebook VM is sometimes not heavy-duty enough to get through a whole training job, especially if you have a large buffer size or a large number of epochs. You can still use this notebook for training, but may need to set up an alternative VM ([learn more](https://research.google.com/colaboratory/local-runtimes.html)) for production use. Alternatively, you can package your code for running large training jobs on Google's AI Platform [as described here](https://cloud.google.com/ml-engine/docs/tensorflow/trainer-considerations). The following code loads a pre-trained model, which you can use for predictions right away." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-RJpNfEUS1qp", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Load a trained model. 50 epochs. 25 hours. Final RMSE ~0.08.\n", + "MODEL_DIR = 'gs://ee-docs-demos/fcnn-demo/trainer/model'\n", + "m = tf.keras.models.load_model(MODEL_DIR)\n", + "m.summary()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J1ySNup0xCqN", + "colab_type": "text" + }, + "source": [ + "# Prediction\n", + "\n", + "The prediction pipeline is:\n", + "\n", + "1. Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storage bucket.\n", + "2. Use the trained model to make the predictions.\n", + "3. Write the predictions to a TFRecord file in a Cloud Storage.\n", + "4. Upload the predictions TFRecord file to Earth Engine.\n", + "\n", + "The following functions handle this process. It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M3WDAa-RUpXP", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def doExport(out_image_base, kernel_buffer, region):\n", + " \"\"\"Run the image export task. Block until complete.\n", + " \"\"\"\n", + " task = ee.batch.Export.image.toCloudStorage(\n", + " image = image.select(BANDS),\n", + " description = out_image_base,\n", + " bucket = BUCKET,\n", + " fileNamePrefix = FOLDER + '/' + out_image_base,\n", + " region = region.getInfo()['coordinates'],\n", + " scale = 30,\n", + " fileFormat = 'TFRecord',\n", + " maxPixels = 1e10,\n", + " formatOptions = {\n", + " 'patchDimensions': KERNEL_SHAPE,\n", + " 'kernelSize': kernel_buffer,\n", + " 'compressed': True,\n", + " 'maxFileSize': 104857600\n", + " }\n", + " )\n", + " task.start()\n", + "\n", + " # Block until the task completes.\n", + " print('Running image export to Cloud Storage...')\n", + " import time\n", + " while task.active():\n", + " time.sleep(30)\n", + "\n", + " # Error condition\n", + " if task.status()['state'] != 'COMPLETED':\n", + " print('Error with image export.')\n", + " else:\n", + " print('Image export completed.')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zb_9_FflygVw", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def doPrediction(out_image_base, user_folder, kernel_buffer, region):\n", + " \"\"\"Perform inference on exported imagery, upload to Earth Engine.\n", + " \"\"\"\n", + "\n", + " print('Looking for TFRecord files...')\n", + "\n", + " # Get a list of all the files in the output bucket.\n", + " filesList = !gsutil ls 'gs://'{BUCKET}'/'{FOLDER}\n", + "\n", + " # Get only the files generated by the image export.\n", + " exportFilesList = [s for s in filesList if out_image_base in s]\n", + "\n", + " # Get the list of image files and the JSON mixer file.\n", + " imageFilesList = []\n", + " jsonFile = None\n", + " for f in exportFilesList:\n", + " if f.endswith('.tfrecord.gz'):\n", + " imageFilesList.append(f)\n", + " elif f.endswith('.json'):\n", + " jsonFile = f\n", + "\n", + " # Make sure the files are in the right order.\n", + " imageFilesList.sort()\n", + "\n", + " from pprint import pprint\n", + " pprint(imageFilesList)\n", + " print(jsonFile)\n", + "\n", + " import json\n", + " # Load the contents of the mixer file to a JSON object.\n", + " jsonText = !gsutil cat {jsonFile}\n", + " # Get a single string w/ newlines from the IPython.utils.text.SList\n", + " mixer = json.loads(jsonText.nlstr)\n", + " pprint(mixer)\n", + " patches = mixer['totalPatches']\n", + "\n", + " # Get set up for prediction.\n", + " x_buffer = int(kernel_buffer[0] / 2)\n", + " y_buffer = int(kernel_buffer[1] / 2)\n", + "\n", + " buffered_shape = [\n", + " KERNEL_SHAPE[0] + kernel_buffer[0],\n", + " KERNEL_SHAPE[1] + kernel_buffer[1]]\n", + "\n", + " imageColumns = [\n", + " tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n", + " for k in BANDS\n", + " ]\n", + "\n", + " imageFeaturesDict = dict(zip(BANDS, imageColumns))\n", + "\n", + " def parse_image(example_proto):\n", + " return tf.io.parse_single_example(example_proto, imageFeaturesDict)\n", + "\n", + " def toTupleImage(inputs):\n", + " inputsList = [inputs.get(key) for key in BANDS]\n", + " stacked = tf.stack(inputsList, axis=0)\n", + " stacked = tf.transpose(stacked, [1, 2, 0])\n", + " return stacked\n", + "\n", + " # Create a dataset from the TFRecord file(s) in Cloud Storage.\n", + " imageDataset = tf.data.TFRecordDataset(imageFilesList, compression_type='GZIP')\n", + " imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n", + " imageDataset = imageDataset.map(toTupleImage).batch(1)\n", + "\n", + " # Perform inference.\n", + " print('Running predictions...')\n", + " predictions = m.predict(imageDataset, steps=patches, verbose=1)\n", + " # print(predictions[0])\n", + "\n", + " print('Writing predictions...')\n", + " out_image_file = 'gs://' + BUCKET + '/' + FOLDER + '/' + out_image_base + '.TFRecord'\n", + " writer = tf.io.TFRecordWriter(out_image_file)\n", + " patches = 0\n", + " for predictionPatch in predictions:\n", + " print('Writing patch ' + str(patches) + '...')\n", + " predictionPatch = predictionPatch[\n", + " x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n", + "\n", + " # Create an example.\n", + " example = tf.train.Example(\n", + " features=tf.train.Features(\n", + " feature={\n", + " 'impervious': tf.train.Feature(\n", + " float_list=tf.train.FloatList(\n", + " value=predictionPatch.flatten()))\n", + " }\n", + " )\n", + " )\n", + " # Write the example.\n", + " writer.write(example.SerializeToString())\n", + " patches += 1\n", + "\n", + " writer.close()\n", + "\n", + " # Start the upload.\n", + " out_image_asset = user_folder + '/' + out_image_base\n", + " !earthengine upload image --asset_id={out_image_asset} {out_image_file} {jsonFile}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LZqlymOehnQO", + "colab_type": "text" + }, + "source": [ + "Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs. In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)). Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch. This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer. For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FPANwc7B1-TS", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Output assets folder: YOUR FOLDER\n", + "user_folder = 'users/username' # INSERT YOUR FOLDER HERE.\n", + "\n", + "# Base file name to use for TFRecord files and assets.\n", + "bj_image_base = 'FCNN_demo_beijing_384_'\n", + "# Half this will extend on the sides of each patch.\n", + "bj_kernel_buffer = [128, 128]\n", + "# Beijing\n", + "bj_region = ee.Geometry.Polygon(\n", + " [[[115.9662455210937, 40.121362012835235],\n", + " [115.9662455210937, 39.64293313749715],\n", + " [117.01818643906245, 39.64293313749715],\n", + " [117.01818643906245, 40.121362012835235]]], None, False)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "lLNEOLkXWvSi", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Run the export.\n", + "doExport(bj_image_base, bj_kernel_buffer, bj_region)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "KxACnxKFrQ_J", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Run the prediction.\n", + "doPrediction(bj_image_base, user_folder, bj_kernel_buffer, bj_region)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uj_G9OZ1xH6K", + "colab_type": "text" + }, + "source": [ + "# Display the output\n", + "\n", + "One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset. Here, display the impervious area predictions over Beijing, China." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Jgco6HJ4R5p2", + "colab_type": "code", + "colab": {} + }, + "source": [ + "out_image = ee.Image(user_folder + '/' + bj_image_base)\n", + "mapid = out_image.getMapId({'min': 0, 'max': 1})\n", + "map = folium.Map(location=[39.898, 116.5097])\n", + "folium.TileLayer(\n", + " tiles=mapid['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='predicted impervious',\n", + " ).add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + } + ] +} diff --git a/guides/linked/Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb b/guides/linked/Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb index e6fdf7868..f3dd10da5 100644 --- a/guides/linked/Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb +++ b/guides/linked/Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb @@ -1 +1,351 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb","provenance":[{"file_id":"1nblLe678Tucbe0Iatdfo0fuztBDxedfp","timestamp":1588787451968}],"private_outputs":true,"collapsed_sections":[],"authorship_tag":"ABX9TyOq42l9DdvNKTF0Ej9L/AS6"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"fSIfBsgi8dNK","colab_type":"code","colab":{}},"source":["#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"aV1xZ1CPi3Nw","colab_type":"text"},"source":["
\n","\n"," Run in Google Colab\n","\n"," View source on GitHub
"]},{"cell_type":"markdown","metadata":{"id":"RPBL-XjRFNop","colab_type":"text"},"source":["# Uploading an image from tiles using a manifest\n","\n","This notebook demonstrates uploading a set of image tiles into a single asset using a manifest file. See [this doc](https://developers.google.com/earth-engine/image_manifest) for more details about manifest upload using the Earth Engine command line tool.\n","\n","10-meter land cover images derived from Sentinel-2 ([reference](https://doi.org/10.1016/j.scib.2019.03.002)) from the [Finer Resolution Global Land Cover Mapping (FROM-GLC) website](http://data.ess.tsinghua.edu.cn/) are downloaded directly to a Cloud Storage bucket and uploaded to a single Earth Engine asset from there. A manifest file, described below, is used to configure the upload."]},{"cell_type":"markdown","metadata":{"id":"K57gwmayH24H","colab_type":"text"},"source":["First, authenticate with Google Cloud, so you can access Cloud Storage buckets."]},{"cell_type":"code","metadata":{"id":"a0WqP4vKIM5v","colab_type":"code","colab":{}},"source":["from google.colab import auth\n","auth.authenticate_user()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"1tPSX8ABIB36","colab_type":"text"},"source":["## Download to Cloud Storage\n","\n","Paths from [the provider website](http://data.ess.tsinghua.edu.cn/fromglc10_2017v01.html) are manually copied to a list object as demonstrated below. Download directly to a Cloud Storage bucket to which you can write."]},{"cell_type":"code","metadata":{"id":"TQGLIdH6IQmn","colab_type":"code","colab":{}},"source":["# URLs of a few tiles.\n","urls = [\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-120.tif',\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-122.tif',\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-124.tif',\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-120.tif',\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-122.tif',\n"," 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-124.tif'\n","]\n","\n","# You need to have write access to this bucket.\n","bucket = 'your-bucket-folder'\n","\n","# Pipe curl output to gsutil.\n","for f in urls:\n"," filepath = bucket + '/' + f.split('/')[-1]\n"," !curl {f} | gsutil cp - {filepath}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nIOsWbLf66F-","colab_type":"text"},"source":["## Build the manifest file\n","\n","Build the manifest file from a dictionary. Turn the dictionary into JSON. Note the use of the `gsutil` tool to get a listing of files in a Cloud Storage bucket ([learn more about `gsutil`](https://cloud.google.com/storage/docs/gsutil)). Also note that the structure of the manifest is described in detail [here](https://developers.google.com/earth-engine/image_manifest#manifest-structure-reference). Because the data are categorical, a `MODE` pyramiding policy is specified. Learn more about how Earth Engine builds image pyramids [here](https://developers.google.com/earth-engine/scale)."]},{"cell_type":"code","metadata":{"id":"DPddpXYrJlap","colab_type":"code","colab":{}},"source":["# List the contents of the cloud folder.\n","cloud_files = !gsutil ls {bucket + '/*.tif'}\n","\n","# Get the list of source URIs from the gsutil output.\n","sources_uris = [{'uris': [f]} for f in cloud_files]\n","\n","asset_name = 'path/to/your/asset'\n","\n","# The enclosing object for the asset.\n","asset = {\n"," 'name': asset_name,\n"," 'tilesets': [\n"," {\n"," 'sources': sources_uris\n"," }\n"," ],\n"," 'bands': [\n"," {\n"," 'id': 'cover_code',\n"," 'pyramiding_policy': 'MODE',\n"," 'missing_data': {\n"," 'values': [0]\n"," }\n"," }\n"," ]\n","}\n","\n","import json\n","print(json.dumps(asset, indent=2))"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"D2j6_TbCUiwZ","colab_type":"text"},"source":["Inspect the printed JSON for errors. If the JSON is acceptable, write it to a file and ensure that the file matches the printed JSON."]},{"cell_type":"code","metadata":{"id":"frZyXUDnFHVv","colab_type":"code","colab":{}},"source":["file_name = 'gaia_manifest.json'\n","\n","with open(file_name, 'w') as f:\n"," json.dump(asset, f, indent=2)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"k9WBqTW6XAwn","colab_type":"text"},"source":["Inspect the written file for errors."]},{"cell_type":"code","metadata":{"id":"wjunR9SLWn2A","colab_type":"code","colab":{}},"source":["!cat {file_name}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4MWm6WWbXG9G","colab_type":"text"},"source":["## Upload to Earth Engine\n","\n","If you are able to `cat` the written file, run the upload to Earth Engine. First, import the Earth Engine library, authenticate and initialize."]},{"cell_type":"code","metadata":{"id":"hLFVQeDPXPE0","colab_type":"code","colab":{}},"source":["import ee\n","ee.Authenticate()\n","ee.Initialize()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"A3ztutjFYqmt","colab_type":"code","colab":{}},"source":["# Do the upload.\n","!earthengine upload image --manifest {file_name}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vELn42MrZxwY","colab_type":"text"},"source":["## Visualize the uploaded image with folium\n","\n","This is what [FROM-GLC](http://data.ess.tsinghua.edu.cn/) says about the classification system:\n","\n","| Class | Code |\n","| ------------- | ------------- |\n","| Cropland | 10 |\n","| Forest | 20 |\n","| Grassland | 30 |\n","| Shrubland | 40 |\n","| Wetland | 50 |\n","| Water | 60 |\n","| Tundra | 70 |\n","| Impervious | 80 |\n","| Bareland | 90 |\n","| Snow/Ice | 100 |\n","\n","Use a modified FROM-GLC palette to visualize the results."]},{"cell_type":"code","metadata":{"id":"mKQOEbkvPAS0","colab_type":"code","colab":{}},"source":["palette = [\n"," 'a3ff73', # farmland\n"," '267300', # forest\n"," 'ffff00', # grassland\n"," '70a800', # shrub\n"," '00ffff', # wetland\n"," '005cff', # water\n"," '004600', # tundra\n"," 'c500ff', # impervious\n"," 'ffaa00', # bare\n"," 'd1d1d1', # snow, ice\n","]\n","vis = {'min': 10, 'max': 100, 'palette': palette}\n","\n","ingested_image = ee.Image('projects/ee-nclinton/assets/fromglc10_demo')\n","map_id = ingested_image.getMapId(vis)\n","\n","import folium\n","\n","map = folium.Map(location=[37.6413, -122.2582])\n","folium.TileLayer(\n"," tiles=map_id['tile_fetcher'].url_format,\n"," attr='Map Data © Google Earth Engine',\n"," overlay=True,\n"," name='fromglc10_demo',\n",").add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]}]} \ No newline at end of file +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Uploading_image_tiles_as_a_single_asset_using_a_manifest.ipynb", + "provenance": [ + { + "file_id": "1nblLe678Tucbe0Iatdfo0fuztBDxedfp", + "timestamp": 1588787451968 + } + ], + "private_outputs": true, + "collapsed_sections": [], + "authorship_tag": "ABX9TyOq42l9DdvNKTF0Ej9L/AS6" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "fSIfBsgi8dNK", + "colab_type": "code", + "colab": {} + }, + "source": [ + "#@title Copyright 2020 Google LLC. { display-mode: \"form\" }\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aV1xZ1CPi3Nw", + "colab_type": "text" + }, + "source": [ + "\u003ctable class=\"ee-notebook-buttons\" align=\"left\"\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Manifest_image_upload_demo.ipynb\"\u003e\n", + " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e Run in Google Colab\u003c/a\u003e\n", + "\u003c/td\u003e\u003ctd\u003e\n", + "\u003ca target=\"_blank\" href=\"https://github.com/google/earthengine-community/blob/master/guides/linked/Manifest_image_upload_demo.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e View source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RPBL-XjRFNop", + "colab_type": "text" + }, + "source": [ + "# Uploading an image from tiles using a manifest\n", + "\n", + "This notebook demonstrates uploading a set of image tiles into a single asset using a manifest file. See [this doc](https://developers.google.com/earth-engine/image_manifest) for more details about manifest upload using the Earth Engine command line tool.\n", + "\n", + "10-meter land cover images derived from Sentinel-2 ([reference](https://doi.org/10.1016/j.scib.2019.03.002)) from the [Finer Resolution Global Land Cover Mapping (FROM-GLC) website](http://data.ess.tsinghua.edu.cn/) are downloaded directly to a Cloud Storage bucket and uploaded to a single Earth Engine asset from there. A manifest file, described below, is used to configure the upload." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K57gwmayH24H", + "colab_type": "text" + }, + "source": [ + "First, authenticate with Google Cloud, so you can access Cloud Storage buckets." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "a0WqP4vKIM5v", + "colab_type": "code", + "colab": {} + }, + "source": [ + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1tPSX8ABIB36", + "colab_type": "text" + }, + "source": [ + "## Download to Cloud Storage\n", + "\n", + "Paths from [the provider website](http://data.ess.tsinghua.edu.cn/fromglc10_2017v01.html) are manually copied to a list object as demonstrated below. Download directly to a Cloud Storage bucket to which you can write." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TQGLIdH6IQmn", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# URLs of a few tiles.\n", + "urls = [\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-120.tif',\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-122.tif',\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_36_-124.tif',\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-120.tif',\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-122.tif',\n", + " 'http://data.ess.tsinghua.edu.cn/data/fromglc10_2017v01/fromglc10v01_38_-124.tif'\n", + "]\n", + "\n", + "# You need to have write access to this bucket.\n", + "bucket = 'your-bucket-folder'\n", + "\n", + "# Pipe curl output to gsutil.\n", + "for f in urls:\n", + " filepath = bucket + '/' + f.split('/')[-1]\n", + " !curl {f} | gsutil cp - {filepath}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nIOsWbLf66F-", + "colab_type": "text" + }, + "source": [ + "## Build the manifest file\n", + "\n", + "Build the manifest file from a dictionary. Turn the dictionary into JSON. Note the use of the `gsutil` tool to get a listing of files in a Cloud Storage bucket ([learn more about `gsutil`](https://cloud.google.com/storage/docs/gsutil)). Also note that the structure of the manifest is described in detail [here](https://developers.google.com/earth-engine/image_manifest#manifest-structure-reference). Because the data are categorical, a `MODE` pyramiding policy is specified. Learn more about how Earth Engine builds image pyramids [here](https://developers.google.com/earth-engine/scale)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DPddpXYrJlap", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# List the contents of the cloud folder.\n", + "cloud_files = !gsutil ls {bucket + '/*.tif'}\n", + "\n", + "# Get the list of source URIs from the gsutil output.\n", + "sources_uris = [{'uris': [f]} for f in cloud_files]\n", + "\n", + "asset_name = 'path/to/your/asset'\n", + "\n", + "# The enclosing object for the asset.\n", + "asset = {\n", + " 'name': asset_name,\n", + " 'tilesets': [\n", + " {\n", + " 'sources': sources_uris\n", + " }\n", + " ],\n", + " 'bands': [\n", + " {\n", + " 'id': 'cover_code',\n", + " 'pyramiding_policy': 'MODE',\n", + " 'missing_data': {\n", + " 'values': [0]\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "\n", + "import json\n", + "print(json.dumps(asset, indent=2))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D2j6_TbCUiwZ", + "colab_type": "text" + }, + "source": [ + "Inspect the printed JSON for errors. If the JSON is acceptable, write it to a file and ensure that the file matches the printed JSON." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "frZyXUDnFHVv", + "colab_type": "code", + "colab": {} + }, + "source": [ + "file_name = 'gaia_manifest.json'\n", + "\n", + "with open(file_name, 'w') as f:\n", + " json.dump(asset, f, indent=2)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k9WBqTW6XAwn", + "colab_type": "text" + }, + "source": [ + "Inspect the written file for errors." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "wjunR9SLWn2A", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!cat {file_name}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4MWm6WWbXG9G", + "colab_type": "text" + }, + "source": [ + "## Upload to Earth Engine\n", + "\n", + "If you are able to `cat` the written file, run the upload to Earth Engine. First, import the Earth Engine library, authenticate and initialize." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hLFVQeDPXPE0", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import ee\n", + "ee.Authenticate()\n", + "ee.Initialize(project='my-project')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "A3ztutjFYqmt", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Do the upload.\n", + "!earthengine upload image --manifest {file_name}" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vELn42MrZxwY", + "colab_type": "text" + }, + "source": [ + "## Visualize the uploaded image with folium\n", + "\n", + "This is what [FROM-GLC](http://data.ess.tsinghua.edu.cn/) says about the classification system:\n", + "\n", + "| Class | Code |\n", + "| ------------- | ------------- |\n", + "| Cropland | 10 |\n", + "| Forest | 20 |\n", + "| Grassland | 30 |\n", + "| Shrubland | 40 |\n", + "| Wetland | 50 |\n", + "| Water | 60 |\n", + "| Tundra | 70 |\n", + "| Impervious | 80 |\n", + "| Bareland | 90 |\n", + "| Snow/Ice | 100 |\n", + "\n", + "Use a modified FROM-GLC palette to visualize the results." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mKQOEbkvPAS0", + "colab_type": "code", + "colab": {} + }, + "source": [ + "palette = [\n", + " 'a3ff73', # farmland\n", + " '267300', # forest\n", + " 'ffff00', # grassland\n", + " '70a800', # shrub\n", + " '00ffff', # wetland\n", + " '005cff', # water\n", + " '004600', # tundra\n", + " 'c500ff', # impervious\n", + " 'ffaa00', # bare\n", + " 'd1d1d1', # snow, ice\n", + "]\n", + "vis = {'min': 10, 'max': 100, 'palette': palette}\n", + "\n", + "ingested_image = ee.Image('projects/ee-nclinton/assets/fromglc10_demo')\n", + "map_id = ingested_image.getMapId(vis)\n", + "\n", + "import folium\n", + "\n", + "map = folium.Map(location=[37.6413, -122.2582])\n", + "folium.TileLayer(\n", + " tiles=map_id['tile_fetcher'].url_format,\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " overlay=True,\n", + " name='fromglc10_demo',\n", + ").add_to(map)\n", + "map.add_child(folium.LayerControl())\n", + "map" + ], + "execution_count": 0, + "outputs": [] + } + ] +} diff --git a/guides/linked/ee-api-colab-setup.ipynb b/guides/linked/ee-api-colab-setup.ipynb index 22535248f..5c12eff3d 100644 --- a/guides/linked/ee-api-colab-setup.ipynb +++ b/guides/linked/ee-api-colab-setup.ipynb @@ -114,7 +114,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] diff --git a/samples/python/guides/dependencies.py b/samples/python/guides/dependencies.py index bc3690fca..1088739a5 100644 --- a/samples/python/guides/dependencies.py +++ b/samples/python/guides/dependencies.py @@ -17,7 +17,7 @@ # [START earthengine__dependencies__ee_setup] import ee ee.Authenticate() -ee.Initialize() +ee.Initialize(project='my-project') # [END earthengine__dependencies__ee_setup] # [START earthengine__dependencies__pprint_import] diff --git a/tutorials/data-converters/index.ipynb b/tutorials/data-converters/index.ipynb index b22f39f59..cdb68a65a 100644 --- a/tutorials/data-converters/index.ipynb +++ b/tutorials/data-converters/index.ipynb @@ -93,7 +93,7 @@ "outputs": [], "source": [ "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ] }, { @@ -143,7 +143,7 @@ "source": [ "basins = ee.FeatureCollection(BASINS_ID)\n", "wa = ee.FeatureCollection(BOUNDARIES_ID).filter(\n", - " 'ADM0_NAME == \"United States of America\" && '\n", + " 'ADM0_NAME == \"United States of America\" \u0026\u0026 '\n", " 'ADM1_NAME == \"Washington\"'\n", ")\n", "\n", @@ -650,7 +650,7 @@ "high_order_wa_basins_df = ee.data.listFeatures({\n", " 'assetId': 'WWF/HydroSHEDS/v1/Basins/hybas_6',\n", " 'region': wa.geometry().getInfo(),\n", - " 'filter': 'ORDER >= 3',\n", + " 'filter': 'ORDER \u003e= 3',\n", " 'fileFormat': 'PANDAS_DATAFRAME'\n", "})\n", "\n", @@ -679,7 +679,7 @@ "high_order_wa_basins_gdf = ee.data.listFeatures({\n", " 'assetId': 'WWF/HydroSHEDS/v1/Basins/hybas_6',\n", " 'region': wa.geometry().getInfo(),\n", - " 'filter': 'ORDER >= 3',\n", + " 'filter': 'ORDER \u003e= 3',\n", " 'fileFormat': 'GEOPANDAS_GEODATAFRAME'\n", "})\n", "\n", @@ -783,7 +783,7 @@ "source": [ "jan_mean_temp_npy = jan_mean_temp_npy['tavg']\n", "\n", - "jan_mean_temp_npy = np.where(jan_mean_temp_npy < -9999, np.nan, jan_mean_temp_npy)\n", + "jan_mean_temp_npy = np.where(jan_mean_temp_npy \u003c -9999, np.nan, jan_mean_temp_npy)\n", "jan_mean_temp_npy = jan_mean_temp_npy * 0.1\n", "jan_mean_temp_npy" ], @@ -820,4 +820,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-1/index.ipynb b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-1/index.ipynb index 169f3996d..21dda632d 100644 --- a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-1/index.ipynb +++ b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-1/index.ipynb @@ -111,7 +111,7 @@ "$$\n", "\n", "More statistics will be introduced as needed.\n", - "A highly recommended reference is [Freund's Mathematical Statistics](https://www.amazon.de/John-Freunds-Mathematical-Statistics/dp/013123613X/ref=sr_1_8?__mk_de_DE=%C3%85M%C3%85%C5%BD%C3%95%C3%91&dchild=1&keywords=freund+mathematical+statistics&qid=1597837319&sr=8-8)." + "A highly recommended reference is [Freund's Mathematical Statistics](https://www.amazon.de/John-Freunds-Mathematical-Statistics/dp/013123613X/ref=sr_1_8?__mk_de_DE=%C3%85M%C3%85%C5%BD%C3%95%C3%91\u0026dchild=1\u0026keywords=freund+mathematical+statistics\u0026qid=1597837319\u0026sr=8-8)." ] }, { @@ -148,7 +148,7 @@ "ee.Authenticate()\n", " \n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -208,7 +208,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles = map_id_dict['tile_fetcher'].url_format,\n", - " attr = 'Map Data © Google Earth Engine',\n", + " attr = 'Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " name = name,\n", " overlay = True,\n", " control = True\n", @@ -571,10 +571,10 @@ "The Sentinel-1 platform is a dual polarimetric synthetic aperture radar system, emitting radar microwaves in the C-band with one polarization (vertical in most cases) and recording both vertical and horizontal reflected polarizations. This is represented mathematically as\n", "\n", "$$\n", - "\\pmatrix{E_v^b\\cr E_h^b} = {e^{-{\\bf i}rk}\\over r}\\pmatrix{S_{vv} & S_{vh}\\cr S_{hv} & S_{hh}}\\pmatrix{E_v^i\\cr 0}. \\tag{1.2}\n", + "\\pmatrix{E_v^b\\cr E_h^b} = {e^{-{\\bf i}rk}\\over r}\\pmatrix{S_{vv} \u0026 S_{vh}\\cr S_{hv} \u0026 S_{hh}}\\pmatrix{E_v^i\\cr 0}. \\tag{1.2}\n", "$$\n", "\n", - "The incident, vertically polarized radar signal $\\pmatrix{E_v^i\\cr 0}$ is transformed by a complex _scattering matrix_ $\\pmatrix{S_{vv} & S_{vh}\\cr S_{hv} & S_{hh}}$ into the backscattered signal $\\pmatrix{E_v^b\\cr E_h^b}$ having both vertical and horizontal polarization components. The exponent term accounts for the phase shift due to the return distance $r$ from target to sensor, where $k$ is the wave number, $k=2\\pi/\\lambda$. From measurement of the backscattered radiation at the sensor, two of the four complex scattering matrix elements can be derived and processed into two-dimensional (slant range $\\times$ azimuth) arrays, comprising the so-called _single look complex_ image. Written as a complex vector, the two derived elements are\n", + "The incident, vertically polarized radar signal $\\pmatrix{E_v^i\\cr 0}$ is transformed by a complex _scattering matrix_ $\\pmatrix{S_{vv} \u0026 S_{vh}\\cr S_{hv} \u0026 S_{hh}}$ into the backscattered signal $\\pmatrix{E_v^b\\cr E_h^b}$ having both vertical and horizontal polarization components. The exponent term accounts for the phase shift due to the return distance $r$ from target to sensor, where $k$ is the wave number, $k=2\\pi/\\lambda$. From measurement of the backscattered radiation at the sensor, two of the four complex scattering matrix elements can be derived and processed into two-dimensional (slant range $\\times$ azimuth) arrays, comprising the so-called _single look complex_ image. Written as a complex vector, the two derived elements are\n", " \n", "$$\n", "S = \\pmatrix{S_{vv}\\cr S_{vh}}. \\tag{1.3}\n", @@ -589,19 +589,19 @@ "and the outer product is the (dual pol) _covariance matrix image_\n", "\n", "$$\n", - "C2 = SS^\\dagger = \\pmatrix{S_{vv}\\cr S_{vh}}(S_{vv}^*\\ S_{vh}^*) = \\pmatrix{|S_{vv}|^2 & S_{vv}^*S_{vh} \\cr S_{vh}^*S_{vv} & |S_{vh}|^2}. \\tag{1.5}\n", + "C2 = SS^\\dagger = \\pmatrix{S_{vv}\\cr S_{vh}}(S_{vv}^*\\ S_{vh}^*) = \\pmatrix{|S_{vv}|^2 \u0026 S_{vv}^*S_{vh} \\cr S_{vh}^*S_{vv} \u0026 |S_{vh}|^2}. \\tag{1.5}\n", "$$\n", "\n", "The diagonal elements are real numbers, the off-diagonal elements are complex conjugates of each other and contain the relative phases of the $S_{vv}$ and $S_{vh}$ components. The off-diagonal elements are not available for S1 archived imagery in GEE, so that if we nevertheless choose to represent the data in covariance matrix form, the matrix is diagonal: \n", "\n", "$$\n", - "C2 = \\pmatrix{|S_{vv}|^2 & 0 \\cr 0 & |S_{vh}|^2}, \\tag{1.6a}\n", + "C2 = \\pmatrix{|S_{vv}|^2 \u0026 0 \\cr 0 \u0026 |S_{vh}|^2}, \\tag{1.6a}\n", "$$\n", "\n", "In terms of radar scattering cross sections (sigma nought),\n", "\n", "$$\n", - "C2 = {1\\over 4\\pi}\\pmatrix{\\sigma^o_{vv} & 0 \\cr 0 & \\sigma^o_{vh}}. \\tag{1.6b}\n", + "C2 = {1\\over 4\\pi}\\pmatrix{\\sigma^o_{vv} \u0026 0 \\cr 0 \u0026 \\sigma^o_{vh}}. \\tag{1.6b}\n", "$$\n" ] }, @@ -766,7 +766,7 @@ "The covariance representation of the dual pol multilook images is \n", "\n", "$$\n", - "C2 = \\pmatrix{\\langle|S_{vv}|^2\\rangle & 0 \\cr 0 & \\langle|S_{vh}|^2\\rangle}. \\tag{1.20}\n", + "C2 = \\pmatrix{\\langle|S_{vv}|^2\\rangle \u0026 0 \\cr 0 \u0026 \\langle|S_{vh}|^2\\rangle}. \\tag{1.20}\n", "$$\n", "\n", "\n", @@ -816,4 +816,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-2/index.ipynb b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-2/index.ipynb index ce2ea300f..6e040990a 100644 --- a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-2/index.ipynb +++ b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-2/index.ipynb @@ -1,1265 +1,1265 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8kdsGkYJXXKc" - }, - "outputs": [], - "source": [ - "#@title Copyright 2020 The Earth Engine Community Authors { display-mode: \"form\" }\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l18M9_r5XmAQ" - }, - "source": [ - "# Detecting Changes in Sentinel-1 Imagery (Part 2)\n", - "Author: mortcanty\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U7i55vr_aKCB" - }, - "source": [ - "### Run me first\n", - "\n", - "Run the following cell to initialize the API. The output will contain instructions on how to grant this notebook access to Earth Engine using your account." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XeFsiSp2aDL6" - }, - "outputs": [], - "source": [ - "import ee\n", - "\n", - "# Trigger the authentication flow.\n", - "ee.Authenticate()\n", - "\n", - "# Initialize the library.\n", - "ee.Initialize()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VOf_UnIcZKBJ" - }, - "source": [ - "### Datasets and Python modules\n", - "One [dataset](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S1_GRD) will be used in the tutorial:\n", - "\n", - "- COPERNICUS/S1_GRD_FLOAT\n", - " - Sentinel-1 ground range detected images\n", - "\n", - "The following cell imports some python modules which we will be using as we go along and enables inline graphics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JR0cxCpeIxoY" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from scipy.stats import norm, gamma, f, chi2\n", - "import IPython.display as disp\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eelxHh2qc6xg" - }, - "source": [ - "And to make use of interactive graphics, we import the _folium_ package:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VIiyf6azf4mU" - }, - "outputs": [], - "source": [ - "# Import the Folium library.\n", - "import folium\n", - "\n", - "# Define a method for displaying Earth Engine image tiles to folium map.\n", - "def add_ee_layer(self, ee_image_object, vis_params, name):\n", - " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", - " folium.raster_layers.TileLayer(\n", - " tiles = map_id_dict['tile_fetcher'].url_format,\n", - " attr = 'Map Data © Google Earth Engine',\n", - " name = name,\n", - " overlay = True,\n", - " control = True\n", - " ).add_to(self)\n", - "\n", - "# Add EE drawing method to folium.\n", - "folium.Map.add_ee_layer = add_ee_layer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AfDTVBnvu5un" - }, - "source": [ - "## Part 2. Hypothesis testing" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r-GuxE6lJHHx" - }, - "source": [ - "We continue from [Part 1](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1) of the Tutorial with the area of interest _aoi_ covering the Frankfurt International Airport and a subset _aoi\\_sub_ consisting of uniform pixels within a forested region." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hww9JAK0JgFm" - }, - "outputs": [], - "source": [ - "geoJSON = {\n", - " \"type\": \"FeatureCollection\",\n", - " \"features\": [\n", - " {\n", - " \"type\": \"Feature\",\n", - " \"properties\": {},\n", - " \"geometry\": {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [\n", - " 8.473892211914062,\n", - " 49.98081240937428\n", - " ],\n", - " [\n", - " 8.658599853515625,\n", - " 49.98081240937428\n", - " ],\n", - " [\n", - " 8.658599853515625,\n", - " 50.06066538593667\n", - " ],\n", - " [\n", - " 8.473892211914062,\n", - " 50.06066538593667\n", - " ],\n", - " [\n", - " 8.473892211914062,\n", - " 49.98081240937428\n", - " ]\n", - " ]\n", - " ]\n", - " }\n", - " }\n", - " ]\n", - "}\n", - "coords = geoJSON['features'][0]['geometry']['coordinates']\n", - "aoi = ee.Geometry.Polygon(coords)\n", - "geoJSON = {\n", - " \"type\": \"FeatureCollection\",\n", - " \"features\": [\n", - " {\n", - " \"type\": \"Feature\",\n", - " \"properties\": {},\n", - " \"geometry\": {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [\n", - " 8.534317016601562,\n", - " 50.021637833966786\n", - " ],\n", - " [\n", - " 8.530540466308594,\n", - " 49.99780882512238\n", - " ],\n", - " [\n", - " 8.564186096191406,\n", - " 50.00663576154257\n", - " ],\n", - " [\n", - " 8.578605651855469,\n", - " 50.019431940583104\n", - " ],\n", - " [\n", - " 8.534317016601562,\n", - " 50.021637833966786\n", - " ]\n", - " ]\n", - " ]\n", - " }\n", - " }\n", - " ]\n", - "}\n", - "coords = geoJSON['features'][0]['geometry']['coordinates']\n", - "aoi_sub = ee.Geometry.Polygon(coords)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C6rM63_lTqJ9" - }, - "source": [ - "This time we filter the S1 archive to get an image collection consisting of two images acquired in the month of August, 2020. Because we are interested in change detection, it is essential that the local incidence angles be the same in both images. So now we specify both the orbit pass (ASCENDING) as well the relative orbit number (15):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ALF5YkahTvRl" - }, - "outputs": [], - "source": [ - "im_coll = (ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\n", - " .filterBounds(aoi)\n", - " .filterDate(ee.Date('2020-08-01'),ee.Date('2020-08-31'))\n", - " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", - " .filter(ee.Filter.eq('relativeOrbitNumber_start', 15))\n", - " .sort('system:time_start'))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gVWNXUIqI-lC" - }, - "source": [ - "Here are the acquisition times in the collection, formatted with Python's _time_ module:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9pXdq7BbGUpm" - }, - "outputs": [], - "source": [ - "import time\n", - "acq_times = im_coll.aggregate_array('system:time_start').getInfo()\n", - "[time.strftime('%x', time.gmtime(acq_time/1000)) for acq_time in acq_times]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lkU_Topgr1Ul" - }, - "source": [ - "### A ratio image" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "toLdO1Qe9eIf" - }, - "source": [ - "Let's select the first two images and extract the VV bands, clipping them to _aoi\\_sub_," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cj9Z7thsJa9t" - }, - "outputs": [], - "source": [ - "im_list = im_coll.toList(im_coll.size())\n", - "im1 = ee.Image(im_list.get(0)).select('VV').clip(aoi_sub)\n", - "im2 = ee.Image(im_list.get(1)).select('VV').clip(aoi_sub)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T0p5u2Kn9ufA" - }, - "source": [ - "Now we'll build the ratio of the VV bands and display it\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gLICx5Y0UCvD" - }, - "outputs": [], - "source": [ - "ratio = im1.divide(im2)\n", - "url = ratio.getThumbURL({'min': 0, 'max': 10})\n", - "disp.Image(url=url, width=800)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OAoSUp799_ZK" - }, - "source": [ - "As in the first part of the Tutorial, standard GEE reducers can be used to calculate a histogram, mean and variance of the ratio image:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gLY4C4V8LoLS" - }, - "outputs": [], - "source": [ - "hist = ratio.reduceRegion(ee.Reducer.fixedHistogram(0, 5, 500), aoi_sub).get('VV').getInfo()\n", - "mean = ratio.reduceRegion(ee.Reducer.mean(), aoi_sub).get('VV').getInfo()\n", - "variance = ratio.reduceRegion(ee.Reducer.variance(), aoi_sub).get('VV').getInfo()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AcC98FbvUpmz" - }, - "source": [ - "Here is a plot of the (normalized) histogram using _numpy_ and _matplotlib_:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pMgsrzHHUuSX" - }, - "outputs": [], - "source": [ - "a = np.array(hist)\n", - "x = a[:, 0]\n", - "y = a[:, 1] / np.sum(a[:, 1])\n", - "plt.grid()\n", - "plt.plot(x, y, '.')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2zyPzJOzU16A" - }, - "source": [ - "This looks a bit like the gamma distribution we met in [Part 1](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#pixel_distributions) but is in fact an _F probability distribution_. The _F_ distribution is defined as the ratio of two chi square distributions, see [Eq. (1.12)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#speckle), with $m_1$ and $m_2$ degrees of freedom. The above histogram is an $F$ distribution with $m_1=2m$ and $m_2=2m$ degrees of freedom and is given by\n", - " \n", - "$$\n", - "p_{f;2m,2m}(x) = {\\Gamma(2m)\\over \\Gamma(m)^2} x^{m-1}(1+x)^{-2m},\n", - "$$\n", - "\n", - "$$\n", - "\\quad {\\rm mean}(x) = {m\\over m-1},\\tag{2.1}\n", - "$$\n", - "\n", - "$$\n", - "\\quad {\\rm var}(x) = {m(2m-1)\\over (m-1)^2 (m-2)}\n", - "$$\n", - " \n", - "with parameter $m = 5$. We can see this empirically by overlaying the distribution onto the histogram with the help of _scipy.stats.f_. The histogram bucket widths are 0.01 so we have to divide by 100:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2vQjpWmJU9jf" - }, - "outputs": [], - "source": [ - "m = 5\n", - "plt.grid()\n", - "plt.plot(x, y, '.', label='data')\n", - "plt.plot(x, f.pdf(x, 2*m, 2*m) / 100, '-r', label='F-dist')\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B4p0R-O4r7kO" - }, - "source": [ - "Checking the mean and variance, we get approximate agreement" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N6vkv1M7gpzy" - }, - "outputs": [], - "source": [ - "print(mean, m/(m-1))\n", - "print(variance, m*(2*m-1)/(m-1)**2/(m-2))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2g5xTiYMF65a" - }, - "source": [ - "So what is so special about this distribution? When looking for changes between two co-registered Sentinel-1 images acquired at different times, it might seem natural to subtract one from the other and then examine the difference, much as we would do for instance with visual/infrared ground reflectance images. In the case of SAR intensity images this is not a good idea. In the difference of two uncorrelated multilook images $\\langle s_1\\rangle$ and $\\langle s_2\\rangle$ the variances add together and, from Eq. (1.21) in the first part of the Tutorial,\n", - " \n", - "$$\n", - "{\\rm var}(\\langle s_1\\rangle-\\langle s_2\\rangle) = {a_1^2+a_2^2\\over m}, \\tag{2.4}\n", - "$$\n", - " \n", - "where $a_1$ and $a_2$ are mean intensities. So difference pixels in bright areas will have a higher variance than difference pixels in darker areas. It is not possible to set a reliable threshold to determine with a given confidence where change has occurred. \n", - " \n", - "It turns out that the _F_ distributed ratio of the two images which we looked at above is much more informative. For each pixel position in the two images, the quotient $\\langle s_1\\rangle / \\langle s_2\\rangle$ is a _likelihood ratio test statistic_ for deciding whether or not a change has occurred between the two acquisition dates at that position. We will explain what this means below. Here for now is the ratio of the two Frankfurt Airport images, this time within the complete _aoi_:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9RYbVUN-G9LR" - }, - "outputs": [], - "source": [ - "im1 = ee.Image(im_list.get(0)).select('VV').clip(aoi)\n", - "im2 = ee.Image(im_list.get(1)).select('VV').clip(aoi)\n", - "ratio = im1.divide(im2)\n", - "\n", - "location = aoi.centroid().coordinates().getInfo()[::-1]\n", - "mp = folium.Map(location=location, zoom_start=12)\n", - "mp.add_ee_layer(ratio,\n", - " {'min': 0, 'max': 20, 'palette': ['black', 'white']}, 'Ratio')\n", - "mp.add_child(folium.LayerControl())\n", - "\n", - "display(mp)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VWFs_0UiK-xs" - }, - "source": [ - "We might guess that the bright pixels here are significant changes, for instance due to aircraft movements on the tarmac or vehicles moving on the highway. Of course ''significant'' doesn't necessarily imply ''interesting''. We already know Frankfurt has a busy airport and that a German Autobahn is always crowded. The question is, how significant are the changes in the statistical sense? Let's now try to answer that question." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8gE7aypYKo0q" - }, - "source": [ - "### Statistical testing\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "k3XO4qP3wNUP" - }, - "source": [ - "A _statistical hypothesis_ is a conjecture about the distributions of one or more measured variables. It might, for instance, be an assertion about the mean of a distribution, or about the equivalence of the variances of two different distributions. We distinguish between _simple_ hypotheses, for which the distributions are completely specified, for example: _the mean of a normal distribution with variance $\\sigma^2$ is $\\mu=0$_, and _composite_ hypotheses, for which this is not the case, e.g., _the mean is $\\mu\\ge 0$_.\n", - "\n", - "In order to test such assertions on the basis of measured values, it is also necessary to formulate _alternative_ hypotheses. To distinguish these from the original assertions, the latter are traditionally called _null_ hypotheses. Thus we might be interested in testing the simple null hypothesis $\\mu = 0$ against the composite alternative hypothesis $\\mu\\ne 0$. An appropriate combination of measurements for deciding whether or not to reject the null hypothesis in favor of its alternative is referred to as a _test statistic_, often denoted by the symbol $Q$. An appropriate _test procedure_ will partition the possible test statistics into two subsets: an acceptance region for the null hypothesis and a rejection region. The latter is customarily referred to as the _critical region_.\n", - "\n", - "Referring to the null hypothesis as $H_0$, there are two kinds of errors which can arise from any test procedure:\n", - "\n", - " - $H_0$ may be rejected when in fact it is true. This is called an _error of the first kind_ and the probability that it will occur is denoted $\\alpha$.\n", - " - $H_0$ may be accepted when in fact it is false, which is called an _error of the second kind_ with probability of occurrence $\\beta$.\n", - "\n", - "The probability of obtaining a value of the test statistic within the critical region when $H_0$ is true is thus $\\alpha$. The probability $\\alpha$ is also referred to as the _level of significance_ of the test or the _probability of a false positive_. It is generally the case that the lower the value of $\\alpha$, the higher is the probability $\\beta$ of making a second kind error, so there is always a trade-off. (Judge Roy Bean, from the film of the same name, didn't believe in trade-offs. He hanged all defendants regardless of the evidence. His $\\beta$ was zero, but his $\\alpha$ was rather large.)\n", - "\n", - "At any rate, traditionally, significance levels of 0.01 or 0.05 are often used.\n", - "\n", - "#### The _P_ value\n", - "\n", - "Suppose we determine the test statistic to have the value $q$. The _P value_ is defined as the probability of getting a test statistic $Q$ that is at least as extreme as the one observed given the null hypothesis. What is meant by \"extreme\" depends on how we choose the test statistic. If this probability is small, then the null hypothesis is unlikely. If it is smaller than the prescribed significance level $\\alpha$, then the null hypothesis is rejected." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FqqH9A3OtY2f" - }, - "source": [ - "#### Likelihood Functions\n", - "\n", - "The $m$-look VV intensity bands of the two Sentinel-1 images that we took from the archive have pixel values\n", - "\n", - "$$\n", - "\\langle s\\rangle=\\langle|S_{vv}|^2\\rangle, \\quad {\\rm with\\ mean}\\ a=|S^a_{vv}|^2,\n", - "$$\n", - "\n", - "and are _gamma_ distributed according to [Eq. (1.1)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#pixel_distributions), with parameters $\\alpha=m$ and $\\beta = a/m$. To make the notation a bit simpler, let's write $s = \\langle s \\rangle$, so that the multi-look averaging is understood.\n", - "\n", - "Using subscript $i=1,2$ to refer to the two images, the probability densities are\n", - "\n", - "$$\n", - "p(s_i| a_i) = {1 \\over (a_i/m)^m\\Gamma(m)}s_i^{m-1}e^{-s_i m/a_i},\\quad i=1,2. \\tag{2.5}\n", - "$$\n", - "\n", - "We've left out the number of looks $m$ on the left hand side, since it is the same for both images. \n", - "\n", - "Now let's formulate a null hypothesis, namely that no change has taken place in the signal strength $a = |S^a_{vv}|^2$ between the two acquisitions, i.e.,\n", - "\n", - "$$\n", - "H_0: \\quad a_1=a_2 = a\n", - "$$ \n", - "\n", - "and test it against the alternative hypothesis that a change took place\n", - "\n", - "$$\n", - "H_1: \\quad a_1\\ne a_2.\n", - "$$ \n", - "\n", - "If the null hypothesis is true, then the so-called _likelihood_ for getting the measured pixel intensities $s_1$ and $s_2$ is defined as the product of the probability densities for that value of $a$,\n", - "\n", - "$$\n", - "L_0(a) = p(s_1|a)p(s_2|a) = {1\\over(a/m)^{2m}\\Gamma(m)^2}(s_1s_2)^{m-1}e^{-(s_1+s_2)m/a}. \\tag{2.6}\n", - "$$\n", - "\n", - "Taking the product of the probability densities like this is justified by the fact that the measurements $s_1$ and $s_2$ are independent.\n", - "\n", - "The _maximum likelihood_ is obtained by maximizing $L_0(a)$ with respect to $a$,\n", - "\n", - "$$\n", - "L_0(\\hat a) = p(s_1|\\hat a)p(s_2|\\hat a), \\quad \\hat a = \\arg\\max_a L_0(a). \n", - "$$\n", - "\n", - "We can get $\\hat a$ simply by solving the equation\n", - "\n", - "$$\n", - "{d L_0(a)\\over da} = 0\n", - "$$\n", - "\n", - "for which we derive the maximum likelihood estimate (an easy exercise)\n", - "\n", - "$$\n", - "\\hat a = {s_1 + s_2 \\over 2}.\n", - "$$\n", - "\n", - "Makes sense: the only information we have is $s_1$ and $s_2$, so, if there was no change, our best estimate of the intensity $a$ is to take the average. Thus, substituting this value into Eq. (2.6), the maximum likelihood under $H_0$ is\n", - "\n", - "$$\n", - "L_0(\\hat a) = {1\\over ((s_1+s_2)/2m)^{2m}\\Gamma(m)^2}(s_1s_2)^{m-1}e^{-2m}. \\tag{2.7}\n", - "$$\n", - "\n", - "Similarly, under the alternative hypothesis $H_1$, the maximum likelihood is\n", - "\n", - "$$\n", - "L_1(\\hat a_1,\\hat a_2) = p(s_1|\\hat a_1)p(s_2|\\hat a_2)\\quad \\hat a_1, \\hat a_2 = \\arg\\max_{a_1,a_2} L_1(a_1,a_2). \n", - "$$\n", - "\n", - "Again, setting derivatives equal to zero, we get for $H_1$\n", - "\n", - "$$\n", - "\\hat a_1 = s_1, \\quad \\hat a_2 = s_2,\n", - "$$\n", - "\n", - "and the maximum likelihood\n", - "\n", - "$$\n", - "L_1(\\hat a_1,\\hat a_2) = {m^{2m}\\over \\Gamma(m)^2}s_1s_2 e^{-2m}. \\tag{2.8}\n", - "$$\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ki2RrEjgTb39" - }, - "source": [ - "### The Likelihood Ratio Test\n", - " \n", - "The theory of statistical testing specifies methods for\n", - "determining the most appropriate test procedure, one which minimizes the probability $\\beta$ of an error of the second kind for a fixed level of significance $\\alpha$. Rather than giving a general definition, we state the appropriate test for our case: \n", - " \n", - "We should reject the null hypothesis if the _ratio_ of the two likelihoods satisfies the inequality\n", - " \n", - "$$\n", - "Q = {L_0(\\hat a)\\over L_1(\\hat a_1,\\hat a_2)} \\le k \\tag{2.9}\n", - "$$\n", - " \n", - "for some appropriately small value of threshold $k$.\n", - " \n", - "This definition simply reflects the fact that, if the null hypothesis is true, the maximum likelihood when $a_1=a_2$ should be close to the maximum likelihood without that restriction, given the measurements $s_1$ and $s_2$. Therefore, if the likelihood ratio is small, (less than or equal to some small value $k$), then $H_0$ should be rejected. \n", - " \n", - "With some (very) simply algebra, Eq. (2.9) evaluates to\n", - " \n", - "$$\n", - "Q = \\left[2^2 \\left( s_1s_2\\over (s_1+s_2)^2\\right)\\right]^m \\le k \\tag{2.10}\n", - "$$\n", - " \n", - "using (2.7) and (2.8). This is the same as saying\n", - " \n", - "$$\n", - "{s_1s_2\\over (s_1+s_2)^2} \\le k'\\quad {\\rm or}\\quad {(s_1+s_2)^2\\over s_1s_2}\\ge k''\\quad {\\rm or}\\quad {s_1\\over s_2}+{s_2\\over s_1}\\ge k''-2\n", - "$$\n", - " \n", - "where $k',k''$ depend on $k$. The last inequality is satisfied if either term is small enough:\n", - " \n", - "$$\n", - "{s_1\\over s_2} < c_1 \\quad {\\rm or}\\quad {s_2\\over s_1} < c_2 \\tag{2.11}\n", - "$$\n", - " \n", - "again for some appropriate threshold $c_1$ and $c_2$ which depend on $k''$. \n", - " \n", - "So the ratio image $s_1/s_2$ that we generated above is indeed a _Likelihood Ratio Test (LRT) statistic_, one of two possible. We'll call it $Q_1 = s_1/s_2$ and the other one $Q_2 = s_2/s_1$. The former tests for a significant increase in intensity between times $t_1$ and $t_2$, the latter for a significant decrease.\n", - " \n", - "Fine, but where does the _F_ distribution come in?\n", - " \n", - "Both $s_1$ and $s_2$ are gamma distributed\n", - " \n", - "$$\n", - "p(s\\mid a) = {1\\over (a/m)^m\\Gamma(m)}s^{m-1}e^{-sm/a}.\n", - "$$\n", - " \n", - "Let $z = 2sm/a$. Then\n", - " \n", - "$$\n", - "p(z\\mid a) = p(s\\mid a)\\left |{ds\\over dz}\\right | = {1\\over (a/m)^m\\Gamma(m)}\\left({za\\over 2m}\\right)^{m-1}\\left({a\\over 2m}\\right) = {1\\over 2^m\\Gamma(m)}z^{m-1}e^{-z/2}.\n", - "$$\n", - " \n", - "Comparing this with [Eq. (1.12)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#speckle) from the first part of the Tutorial, we see that $z$ is chi square distributed with $2m$ degrees of freedom, and therefore so are the variables $2s_1m/a$ and $2s_2m/a$. The quotients $s_1/s_2$ and $s_2/s_1$ are thus ratios of two chi square distributed variables with $2m$ degrees of freedom. They therefore have the _F_ distribution of Eq. (2.1).\n", - " \n", - "In order to decide the test for $Q_1$, we need the _P_ value for a measurement $q_1$ of the statistic. Recall that this is the probability of getting a result at least as extreme as the one measured under the null hypothesis. So in this case\n", - " \n", - "$$\n", - "P_1 = {\\rm Prob}(Q_1\\le q_1\\mid H_0), \\tag{2.12}\n", - "$$\n", - " \n", - "which we can calculate from the percentiles of the _F_ distribution, Eq. (2.1). Then if $P_1\\le \\alpha/2$ we reject $H_0$ and conclude with significance $\\alpha/2$ that a change occurred. We do the same test for $Q_2$, so that the combined significance is $\\alpha$." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kocUkbzZ3vkQ" - }, - "source": [ - "Now we can make a change map for the Frankfurt Airport for the two acquisitions, August 5 and August 11, 2020. We want to see quite large changes associated primarily with airplane and vehicle movements, so we will set the significance generously low to $\\alpha = 0.001$. We will also distinguish the direction of change and mask out the no-change pixels:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DXpGkHFrp4Q3" - }, - "outputs": [], - "source": [ - "# Decision threshold alpha/2:\n", - "dt = f.ppf(0.0005, 2*m, 2*m)\n", - "\n", - "# LRT statistics.\n", - "q1 = im1.divide(im2)\n", - "q2 = im2.divide(im1)\n", - "\n", - "# Change map with 0 = no change, 1 = decrease, 2 = increase in intensity.\n", - "c_map = im1.multiply(0).where(q2.lt(dt), 1)\n", - "c_map = c_map.where(q1.lt(dt), 2)\n", - "\n", - "# Mask no-change pixels.\n", - "c_map = c_map.updateMask(c_map.gt(0))\n", - "\n", - "# Display map with red for increase and blue for decrease in intensity.\n", - "location = aoi.centroid().coordinates().getInfo()[::-1]\n", - "mp = folium.Map(\n", - " location=location, tiles='cartodb positron',\n", - " zoom_start=13)\n", - "folium.TileLayer('OpenStreetMap').add_to(mp)\n", - "mp.add_ee_layer(ratio,\n", - " {'min': 0, 'max': 20, 'palette': ['black', 'white']}, 'Ratio')\n", - "mp.add_ee_layer(c_map,\n", - " {'min': 0, 'max': 2, 'palette': ['black', 'blue', 'red']},\n", - " 'Change Map')\n", - "mp.add_child(folium.LayerControl())\n", - "\n", - "display(mp)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YSfPdRftGdrW" - }, - "source": [ - "Most changes are within the airport or on the Autobahn. Barge movements on the Main River (upper left hand corner) are also signaled as significant changes. Note that the 'red' changes (significant increases in intensity) do not show up in the 'ratio' overlay, which displays $s_1/s_2$." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4mYnuTu4rpcn" - }, - "source": [ - "### Bivariate change detection" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mCZbWa7gEaHH" - }, - "source": [ - "Rather than analyzing the VV and VH bands individually, it would make more sense to treat them together, and that is what we will now do. It is convenient to work with the covariance matrix form for measured intensities that we introduce in Part 1, see [Eq.(1.6a)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#single_look_complex_slc_sar_measurements). Again with the aim of keeping the notation simple, define\n", - "\n", - "$$\n", - "\\pmatrix{ s_i & 0\\cr 0 & r_i} = \\pmatrix{\\langle|S_{vv}|^2\\rangle_i & 0 \\cr 0 & \\langle|S_{vh}|^2\\rangle_i}, \\quad {\\rm with\\ means}\\quad a_i = \\langle|S^{a_i}_{vv}|^2\\rangle, \\quad b_i = \\langle|S^{b_i}_{vh}|^2\\rangle \\tag{2.13}\n", - "$$\n", - "\n", - "for the two acquisition times $t_i,\\ i=1,2$. \n", - "\n", - "Under $H_0$ we have $a_1=a_2=a$ and $b_1=b_2=b$. Assuming independence of $s_i$ and $r_i$, the likelihood function is the product of the four gamma distributions\n", - "\n", - "$$\n", - "L_0(a,b) = p(s_1\\mid a)p(r_1\\mid b)p(s_2\\mid a)p(r_2\\mid b).\n", - "$$\n", - "\n", - "Under $H_1$,\n", - "\n", - "$$\n", - "L_1(a_1,b_1,a_2,b_2) = p(s_1\\mid a_1)p(r_1\\mid b_1)p(s_2\\mid a_2)p(r_2\\mid b_2).\n", - "$$\n", - "\n", - "With maximum likelihood estimates under $H_0$ \n", - "\n", - "$$\n", - "\\hat a = (s_1+s_2)/2\\quad {\\rm and}\\quad \\hat b = (r_1+r_2)/2\n", - "$$ \n", - "\n", - "for the parameters and some simple algebra, we get \n", - "\n", - "$$\n", - "L_0(\\hat a,\\hat b) = {(2m)^{4m}\\over (s_1+s_2)^{2m}(r_1+r_2)^{2m}\\Gamma(m)^4}s_1r_1s_2r_2e^{-4m}. \\tag{2.14}\n", - "$$ \n", - "\n", - "Similarly with $\\hat a_1=s_1,\\ \\hat b_1=r_1,\\ \\hat a_2=s_2,\\ \\hat b_2=r_2$, we calculate\n", - "\n", - "$$\n", - "L_1(\\hat a_1,\\hat b_1,\\hat a_2,\\hat b_2) = {m^{4m}\\over s_1r_1s_2r_2}e^{-4m}.\n", - "$$\n", - "\n", - "The likelihood test statistic in then\n", - "\n", - "$$\n", - "Q = {L_0(\\hat a,\\hat b)\\over L_1(\\hat a_1,\\hat b_1,\\hat a_2,\\hat b_2)}={2^4(s_1r_1s_2r_2)^m\\over (s_1+s_2)^{2m}(r_1+r_2)^{2m}}.\n", - "$$\n", - "\n", - "Writing this in terms of the covariance matrix representation,\n", - "\n", - "$$\n", - "c_i = \\pmatrix{s_i & 0\\cr 0 & r_i},\\quad i=1,2,\n", - "$$\n", - "\n", - "we derive, finally, the likelihood ratio test\n", - "\n", - "$$\n", - "Q = \\left[2^4\\pmatrix{|c_1| |c_2|\\over |c_1+c_2|^2 }\\right]^m \\le k, \\tag{2.15}\n", - "$$\n", - "\n", - "where $|\\cdot|$ indicates the matrix determinant, $|c_i|=s_ir_i$. \n", - "\n", - "So far so good. But in order to determine _P_ values, we need the probability distribution of $Q$. This time we have no idea how to obtain it. Here again, statistical theory comes to our rescue.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hWU-gu4W3p-_" - }, - "source": [ - "Let $\\Theta$ be the parameter space for the LRT. In our example it is \n", - "$$\n", - "\\Theta = \\{ a_1,b_1,a_2,b_2\\}\n", - "$$ \n", - "and has $d=4$ dimensions. Under the null hypothesis the parameter space is restricted by the conditions $a=a_1=a_2$ and $b=b_1=b_2$ to \n", - "$$\n", - "\\Theta_0 = \\{ a,b\\}\n", - "$$ \n", - "with $d_0=2$ dimensions. According to [Wilks' Theorem](https://en.wikipedia.org/wiki/Wilks%27_theorem), as the number of measurements determining the LRT statistic $Q$ approaches $\\infty$, the test statistic $-2\\log Q$ approaches a chi square distribution with $d-d_0=2$ degrees of freedom. (Recall that, in order to determine the matrices $c_1$ and $c_2$, five individual measurements were averaged or multi-looked.) So rather than working with $Q$ directly, we use $-2\\log Q$ instead and hope that Wilk's theorem is a good enough approximation for our case.\n", - "\n", - "In order to check if this is so, we just have to program \n", - "\n", - "$$\n", - "-2\\log Q = (\\log{|c_1|}+\\log{|c_2|}-2\\log{|c_1+c_2|}+4\\log{2})(-2m)\n", - "$$ \n", - "\n", - "in GEE-ese:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5HKdnWr8YU1U" - }, - "outputs": [], - "source": [ - "def det(im):\n", - " return im.expression('b(0) * b(1)')\n", - "\n", - "# Number of looks.\n", - "m = 5\n", - "\n", - "im1 = ee.Image(im_list.get(0)).select('VV', 'VH').clip(aoi)\n", - "im2 = ee.Image(im_list.get(1)).select('VV', 'VH').clip(aoi)\n", - "\n", - "m2logQ = det(im1).log().add(det(im2).log()).subtract(\n", - " det(im1.add(im2)).log().multiply(2)).add(4*np.log(2)).multiply(-2*m)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Wmprc_di-PBz" - }, - "source": [ - "and then plot its histogram, comparing it with the chi square distribution _scipy.stats.chi2.pdf()_ with two degrees of freedom:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EVAjH_uR_kkg" - }, - "outputs": [], - "source": [ - "hist = m2logQ.reduceRegion(\n", - " ee.Reducer.fixedHistogram(0, 20, 200), aoi).get('VV').getInfo()\n", - "a = np.array(hist)\n", - "x = a[:, 0]\n", - "y = a[:, 1] / np.sum(a[:, 1])\n", - "plt.plot(x, y, '.', label='data')\n", - "plt.plot(x, chi2.pdf(x, 2)/10, '-r', label='chi square')\n", - "plt.legend()\n", - "plt.grid()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KvOXWaRhCQ8G" - }, - "source": [ - "Looks pretty good. Note now that a small value of the LRT $Q$ in Eq. (2.15) corresponds to a large value of $-2\\log{Q}$. Therefore the _P_ value for a measurement $q$ is now the probability of getting the value $-2\\log{q}$\n", - "or higher,\n", - "$$\n", - "P = {\\rm Prob}(-2\\log{Q} \\ge -2\\log{q}) = 1 - {\\rm Prob}(-2\\log{Q} < -2\\log{q}).\n", - "$$\n", - "\n", - "So let's try out our bivariate change detection procedure, this time on an agricultural scene where we expect to see larger regions of change." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UHYIAceXKNjG" - }, - "outputs": [], - "source": [ - "geoJSON ={\n", - " \"type\": \"FeatureCollection\",\n", - " \"features\": [\n", - " {\n", - " \"type\": \"Feature\",\n", - " \"properties\": {},\n", - " \"geometry\": {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [\n", - " -98.2122802734375,\n", - " 49.769291532628515\n", - " ],\n", - " [\n", - " -98.00559997558594,\n", - " 49.769291532628515\n", - " ],\n", - " [\n", - " -98.00559997558594,\n", - " 49.88578690918283\n", - " ],\n", - " [\n", - " -98.2122802734375,\n", - " 49.88578690918283\n", - " ],\n", - " [\n", - " -98.2122802734375,\n", - " 49.769291532628515\n", - " ]\n", - " ]\n", - " ]\n", - " }\n", - " }\n", - " ]\n", - "}\n", - "coords = geoJSON['features'][0]['geometry']['coordinates']\n", - "aoi1 = ee.Geometry.Polygon(coords)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IpOZqWqUROoG" - }, - "source": [ - "This is a mixed agricultural/forest area in southern Manitoba, Canada. We'll gather two images, one from the beginning of August and one from the beginning of September, 2018. A lot of harvesting takes place in this interval, so we expect some extensive changes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DmQfIKvmLdCc" - }, - "outputs": [], - "source": [ - "im1 = ee.Image(ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\n", - " .filterBounds(aoi1)\n", - " .filterDate(ee.Date('2018-08-01'), ee.Date('2018-08-31'))\n", - " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", - " .filter(ee.Filter.eq('relativeOrbitNumber_start', 136))\n", - " .first()\n", - " .clip(aoi1))\n", - "im2 = ee.Image(ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT').filterBounds(aoi1)\n", - " .filterDate(ee.Date('2018-09-01'), ee.Date('2018-09-30'))\n", - " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", - " .filter(ee.Filter.eq('relativeOrbitNumber_start', 136))\n", - " .first()\n", - " .clip(aoi1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qaeCqVQhEe9F" - }, - "source": [ - "Here are the acquisition times:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EZjsAxhKKmZs" - }, - "outputs": [], - "source": [ - "acq_time = im1.get('system:time_start').getInfo()\n", - "print( time.strftime('%x', time.gmtime(acq_time/1000)) )\n", - "acq_time = im2.get('system:time_start').getInfo()\n", - "print( time.strftime('%x', time.gmtime(acq_time/1000)) )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5T9VFW1hSZMR" - }, - "source": [ - "Fortunately it is possible to map the chi square cumulative distribution function over an _ee.Image()_ so that a _P_ value image can be calculated directly. This wasn't possible in the single band case, as the _F_ cumulative distribution is not available on the GEE. Here are the _P_ values:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kmU0Qt8GK8CB" - }, - "outputs": [], - "source": [ - "def chi2cdf(chi2, df):\n", - " ''' Chi square cumulative distribution function for df degrees of freedom\n", - " using the built-in incomplete gamma function gammainc() '''\n", - " return ee.Image(chi2.divide(2)).gammainc(ee.Number(df).divide(2))\n", - "\n", - "# The observed test statistic image -2logq.\n", - "m2logq = det(im1).log().add(det(im2).log()).subtract(\n", - " det(im1.add(im2)).log().multiply(2)).add(4*np.log(2)).multiply(-2*m)\n", - "\n", - "# The P value image prob(m2logQ > m2logq) = 1 - prob(m2logQ < m2logq).\n", - "p_value = ee.Image.constant(1).subtract(chi2cdf(m2logq, 2))\n", - "\n", - "# Project onto map.\n", - "location = aoi1.centroid().coordinates().getInfo()[::-1]\n", - "mp = folium.Map(location=location, zoom_start=12)\n", - "mp.add_ee_layer(p_value,\n", - " {'min': 0,'max': 1, 'palette': ['black', 'white']}, 'P-value')\n", - "mp.add_child(folium.LayerControl())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZLQlE1IlM6Dj" - }, - "source": [ - "The uniformly dark areas correspond to small or vanishing _P_ values and signify change. The bright areas correspond to no change. Why they are not uniformly bright will be explained below. Now we set a significance threshold of $\\alpha=0.01$ and display the significant changes, whereby 1% of them will be false positives. For reference we also show the 2018 [Canada AAFC Annual Crop Inventory](https://developers.google.com/earth-engine/datasets/catalog/AAFC_ACI) map, which is available as a GEE collection:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lInJMXCyTlaF" - }, - "outputs": [], - "source": [ - "c_map = p_value.multiply(0).where(p_value.lt(0.01), 1)\n", - "\n", - "crop2018 = (ee.ImageCollection('AAFC/ACI')\n", - " .filter(ee.Filter.date('2018-01-01', '2018-12-01'))\n", - " .first()\n", - " .clip(aoi1))\n", - "\n", - "mp = folium.Map(location=location, zoom_start=12)\n", - "mp.add_ee_layer(crop2018, {min: 0, max: 255}, 'crop2018')\n", - "mp.add_ee_layer(c_map.updateMask(\n", - " c_map.gt(0)), {'min': 0, 'max': 1, 'palette': ['black', 'red']}, 'c_map')\n", - "mp.add_child(folium.LayerControl())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vATDsxFOOESB" - }, - "source": [ - " The major crops in the scene are soybeans (dark brown), oats (light brown), canola (light green), corn (light yellow) and winter wheat (dark gray). The wooded areas exhibit little change, while canola has evidently been extensively harvested in the interval." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "COkFrH11bpNI" - }, - "source": [ - "#### A note on _P_ values\n", - "Because small _P_ values are indicative of change, it is tempting to say that, the larger the _P_ value, the higher the probability of no change. Or more explicitly, the _P_ value is itself the no change probability. Let's see why this is false. Below we choose a wooded area of the agricultural scene where few significant changes are to be expected and use it to subset the _P_ value image. Then we plot the histogram of the subset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "761UOg0UCEmQ" - }, - "outputs": [], - "source": [ - "geoJSON ={\n", - " \"type\": \"FeatureCollection\",\n", - " \"features\": [\n", - " {\n", - " \"type\": \"Feature\",\n", - " \"properties\": {},\n", - " \"geometry\": {\n", - " \"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " [\n", - " [\n", - " -98.18550109863281,\n", - " 49.769735012247885\n", - " ],\n", - " [\n", - " -98.13949584960938,\n", - " 49.769735012247885\n", - " ],\n", - " [\n", - " -98.13949584960938,\n", - " 49.798109268622\n", - " ],\n", - " [\n", - " -98.18550109863281,\n", - " 49.798109268622\n", - " ],\n", - " [\n", - " -98.18550109863281,\n", - " 49.769735012247885\n", - " ]\n", - " ]\n", - " ]\n", - " }\n", - " }\n", - " ]\n", - "}\n", - "coords = geoJSON['features'][0]['geometry']['coordinates']\n", - "aoi1_sub = ee.Geometry.Polygon(coords)\n", - "hist = p_value.reduceRegion(ee.Reducer.fixedHistogram(0, 1, 100), aoi1_sub).get('constant').getInfo()\n", - "a = np.array(hist)\n", - "x = a[:,0]\n", - "y = a[:,1]/np.sum(a[:,1])\n", - "plt.plot(x, y, '.b', label='p-value')\n", - "plt.ylim(0, 0.05)\n", - "plt.grid()\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pz4ipvMKPmxK" - }, - "source": [ - "So the P values of no-change measurements are uniformly distributed over $[0, 1]$ (the excess of small _P_ values at the left can be ascribed to genuine changes within the polygon). A large _P_ value is no more indicative of no change than a small one. Of course it has to be this way. When, for example, we set a significance level of 5%, then the fraction of false positives, i.e., the fraction of _P_ values smaller than 0.05 given $H_0$, must also be 5%. This accounts for the noisy appearance of the _P_ value image in the no-change regions." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-LnZpAKQcRz5" - }, - "source": [ - "#### Change direction: the Loewner order\n", - "What about the direction of change in the bivariate case? This is less clear, as we can have the situation where the VV intensity gets larger and the VH smaller from time $t_1$ to $t_2$, or vice versa. When we are dealing with the C2 covariance matrix representation of SAR imagery, see Eq. (2.13), a characterization of change can be made as follows [(Nielsen et al. (2019))](https://ieeexplore.ieee.org/document/8736751): For each significantly changed pixel, we determine the difference $C2_{t_2}-C2_{t_1}$ and examine its so-called _definiteness_, also known as the _Loewner order_ of the change. A matrix is said to be _positive definite_ if all of its eigenvalues are positive, _negative definite_ if they are all negative, otherwise _indefinite_. In the case of the $2\\times 2$ diagonal matrices that we are concerned with the eigenvalues are just the two diagonal elements themselves, so determining the Loewner order is trivial. For full $2\\times 2$ dual pol or $3\\times 3$ quad pol SAR imagery, devising an efficient way to determine the Loewner order is more difficult, see [Nielsen (2019)](https://ieeexplore.ieee.org/document/8913617).\n", - "\n", - "So let's include the Loewner order in our change map:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FOL0V1DNsCqH" - }, - "outputs": [], - "source": [ - "c_map = p_value.multiply(0).where(p_value.lt(0.01), 1)\n", - "diff = im2.subtract(im1)\n", - "d_map = c_map.multiply(0) # Initialize the direction map to zero.\n", - "d_map = d_map.where(det(diff).gt(0), 2) # All pos or neg def diffs are now labeled 2.\n", - "d_map = d_map.where(diff.select(0).gt(0), 3) # Re-label pos def (and label some indef) to 3.\n", - "d_map = d_map.where(det(diff).lt(0), 1) # Label all indef to 1.\n", - "c_map = c_map.multiply(d_map) # Re-label the c_map, 0*X = 0, 1*1 = 1, 1*2= 2, 1*3 = 3." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MVh9eU6V3bVF" - }, - "source": [ - "Now we display the changes, with positive definite red, negative definite blue, and indefinite yellow:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "b8_ggucVvOuh" - }, - "outputs": [], - "source": [ - "mp = folium.Map(location=location, zoom_start=12)\n", - "mp.add_ee_layer(crop2018, {min: 0, max: 255}, 'crop2018')\n", - "mp.add_ee_layer(\n", - " c_map.updateMask(c_map.gt(0)), {\n", - " 'min': 0,\n", - " 'max': 3,\n", - " 'palette': ['black', 'yellow', 'blue', 'red']\n", - " }, 'c_map')\n", - "mp.add_child(folium.LayerControl())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ghLYSYeLSj0P" - }, - "source": [ - "The more or less compact blue changes indicate a decrease in reflectivity in both VV and VH bands, and correspond to crop harvesting (especially canola).\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RvQO9S0W8UEQ" - }, - "source": [ - "### Outlook\n", - "We have now covered the subject of bitemporal change detection with GEE Sentinel-1 imagery. The beauty of GEE is that it is trivially easy to gather arbitrarily long time series of S1 images from the archive, all with revisit times of 6 or 12 days depending on whether one or both satellites are collecting data. The next part of the Tutorial will generalize the techniques we have learned so far to treat multitemporal change detection.\n", - "\n", - "### Oh, and one more thing ...\n", - "\n", - "We didn't mention it above, but note the similarity between Eq. (2.10) and Eq. (2.15). To go from the monovariate LRT to the bivariate LRT, we simply replace the product of intensities $s_1s_2$ by the product of determinants $|c_1||c_2|$, the sum $s_1+s_2$ by $|c_1+c_2|$ and the factor $2^{2}$ by $2^4=2^{2\\cdot2}$. This observation will come in handy in Part 3." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "Detecting Changes in Sentinel-1 Imagery (Part 2)", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 1 + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8kdsGkYJXXKc" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The Earth Engine Community Authors { display-mode: \"form\" }\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l18M9_r5XmAQ" + }, + "source": [ + "# Detecting Changes in Sentinel-1 Imagery (Part 2)\n", + "Author: mortcanty\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U7i55vr_aKCB" + }, + "source": [ + "### Run me first\n", + "\n", + "Run the following cell to initialize the API. The output will contain instructions on how to grant this notebook access to Earth Engine using your account." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XeFsiSp2aDL6" + }, + "outputs": [], + "source": [ + "import ee\n", + "\n", + "# Trigger the authentication flow.\n", + "ee.Authenticate()\n", + "\n", + "# Initialize the library.\n", + "ee.Initialize(project='my-project')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VOf_UnIcZKBJ" + }, + "source": [ + "### Datasets and Python modules\n", + "One [dataset](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S1_GRD) will be used in the tutorial:\n", + "\n", + "- COPERNICUS/S1_GRD_FLOAT\n", + " - Sentinel-1 ground range detected images\n", + "\n", + "The following cell imports some python modules which we will be using as we go along and enables inline graphics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JR0cxCpeIxoY" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from scipy.stats import norm, gamma, f, chi2\n", + "import IPython.display as disp\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eelxHh2qc6xg" + }, + "source": [ + "And to make use of interactive graphics, we import the _folium_ package:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VIiyf6azf4mU" + }, + "outputs": [], + "source": [ + "# Import the Folium library.\n", + "import folium\n", + "\n", + "# Define a method for displaying Earth Engine image tiles to folium map.\n", + "def add_ee_layer(self, ee_image_object, vis_params, name):\n", + " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", + " folium.raster_layers.TileLayer(\n", + " tiles = map_id_dict['tile_fetcher'].url_format,\n", + " attr = 'Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", + " name = name,\n", + " overlay = True,\n", + " control = True\n", + " ).add_to(self)\n", + "\n", + "# Add EE drawing method to folium.\n", + "folium.Map.add_ee_layer = add_ee_layer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AfDTVBnvu5un" + }, + "source": [ + "## Part 2. Hypothesis testing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r-GuxE6lJHHx" + }, + "source": [ + "We continue from [Part 1](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1) of the Tutorial with the area of interest _aoi_ covering the Frankfurt International Airport and a subset _aoi\\_sub_ consisting of uniform pixels within a forested region." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hww9JAK0JgFm" + }, + "outputs": [], + "source": [ + "geoJSON = {\n", + " \"type\": \"FeatureCollection\",\n", + " \"features\": [\n", + " {\n", + " \"type\": \"Feature\",\n", + " \"properties\": {},\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [\n", + " 8.473892211914062,\n", + " 49.98081240937428\n", + " ],\n", + " [\n", + " 8.658599853515625,\n", + " 49.98081240937428\n", + " ],\n", + " [\n", + " 8.658599853515625,\n", + " 50.06066538593667\n", + " ],\n", + " [\n", + " 8.473892211914062,\n", + " 50.06066538593667\n", + " ],\n", + " [\n", + " 8.473892211914062,\n", + " 49.98081240937428\n", + " ]\n", + " ]\n", + " ]\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "coords = geoJSON['features'][0]['geometry']['coordinates']\n", + "aoi = ee.Geometry.Polygon(coords)\n", + "geoJSON = {\n", + " \"type\": \"FeatureCollection\",\n", + " \"features\": [\n", + " {\n", + " \"type\": \"Feature\",\n", + " \"properties\": {},\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [\n", + " 8.534317016601562,\n", + " 50.021637833966786\n", + " ],\n", + " [\n", + " 8.530540466308594,\n", + " 49.99780882512238\n", + " ],\n", + " [\n", + " 8.564186096191406,\n", + " 50.00663576154257\n", + " ],\n", + " [\n", + " 8.578605651855469,\n", + " 50.019431940583104\n", + " ],\n", + " [\n", + " 8.534317016601562,\n", + " 50.021637833966786\n", + " ]\n", + " ]\n", + " ]\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "coords = geoJSON['features'][0]['geometry']['coordinates']\n", + "aoi_sub = ee.Geometry.Polygon(coords)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C6rM63_lTqJ9" + }, + "source": [ + "This time we filter the S1 archive to get an image collection consisting of two images acquired in the month of August, 2020. Because we are interested in change detection, it is essential that the local incidence angles be the same in both images. So now we specify both the orbit pass (ASCENDING) as well the relative orbit number (15):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ALF5YkahTvRl" + }, + "outputs": [], + "source": [ + "im_coll = (ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\n", + " .filterBounds(aoi)\n", + " .filterDate(ee.Date('2020-08-01'),ee.Date('2020-08-31'))\n", + " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", + " .filter(ee.Filter.eq('relativeOrbitNumber_start', 15))\n", + " .sort('system:time_start'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gVWNXUIqI-lC" + }, + "source": [ + "Here are the acquisition times in the collection, formatted with Python's _time_ module:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9pXdq7BbGUpm" + }, + "outputs": [], + "source": [ + "import time\n", + "acq_times = im_coll.aggregate_array('system:time_start').getInfo()\n", + "[time.strftime('%x', time.gmtime(acq_time/1000)) for acq_time in acq_times]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lkU_Topgr1Ul" + }, + "source": [ + "### A ratio image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "toLdO1Qe9eIf" + }, + "source": [ + "Let's select the first two images and extract the VV bands, clipping them to _aoi\\_sub_," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cj9Z7thsJa9t" + }, + "outputs": [], + "source": [ + "im_list = im_coll.toList(im_coll.size())\n", + "im1 = ee.Image(im_list.get(0)).select('VV').clip(aoi_sub)\n", + "im2 = ee.Image(im_list.get(1)).select('VV').clip(aoi_sub)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T0p5u2Kn9ufA" + }, + "source": [ + "Now we'll build the ratio of the VV bands and display it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gLICx5Y0UCvD" + }, + "outputs": [], + "source": [ + "ratio = im1.divide(im2)\n", + "url = ratio.getThumbURL({'min': 0, 'max': 10})\n", + "disp.Image(url=url, width=800)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OAoSUp799_ZK" + }, + "source": [ + "As in the first part of the Tutorial, standard GEE reducers can be used to calculate a histogram, mean and variance of the ratio image:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gLY4C4V8LoLS" + }, + "outputs": [], + "source": [ + "hist = ratio.reduceRegion(ee.Reducer.fixedHistogram(0, 5, 500), aoi_sub).get('VV').getInfo()\n", + "mean = ratio.reduceRegion(ee.Reducer.mean(), aoi_sub).get('VV').getInfo()\n", + "variance = ratio.reduceRegion(ee.Reducer.variance(), aoi_sub).get('VV').getInfo()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AcC98FbvUpmz" + }, + "source": [ + "Here is a plot of the (normalized) histogram using _numpy_ and _matplotlib_:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pMgsrzHHUuSX" + }, + "outputs": [], + "source": [ + "a = np.array(hist)\n", + "x = a[:, 0]\n", + "y = a[:, 1] / np.sum(a[:, 1])\n", + "plt.grid()\n", + "plt.plot(x, y, '.')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2zyPzJOzU16A" + }, + "source": [ + "This looks a bit like the gamma distribution we met in [Part 1](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#pixel_distributions) but is in fact an _F probability distribution_. The _F_ distribution is defined as the ratio of two chi square distributions, see [Eq. (1.12)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#speckle), with $m_1$ and $m_2$ degrees of freedom. The above histogram is an $F$ distribution with $m_1=2m$ and $m_2=2m$ degrees of freedom and is given by\n", + " \n", + "$$\n", + "p_{f;2m,2m}(x) = {\\Gamma(2m)\\over \\Gamma(m)^2} x^{m-1}(1+x)^{-2m},\n", + "$$\n", + "\n", + "$$\n", + "\\quad {\\rm mean}(x) = {m\\over m-1},\\tag{2.1}\n", + "$$\n", + "\n", + "$$\n", + "\\quad {\\rm var}(x) = {m(2m-1)\\over (m-1)^2 (m-2)}\n", + "$$\n", + " \n", + "with parameter $m = 5$. We can see this empirically by overlaying the distribution onto the histogram with the help of _scipy.stats.f_. The histogram bucket widths are 0.01 so we have to divide by 100:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2vQjpWmJU9jf" + }, + "outputs": [], + "source": [ + "m = 5\n", + "plt.grid()\n", + "plt.plot(x, y, '.', label='data')\n", + "plt.plot(x, f.pdf(x, 2*m, 2*m) / 100, '-r', label='F-dist')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B4p0R-O4r7kO" + }, + "source": [ + "Checking the mean and variance, we get approximate agreement" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N6vkv1M7gpzy" + }, + "outputs": [], + "source": [ + "print(mean, m/(m-1))\n", + "print(variance, m*(2*m-1)/(m-1)**2/(m-2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2g5xTiYMF65a" + }, + "source": [ + "So what is so special about this distribution? When looking for changes between two co-registered Sentinel-1 images acquired at different times, it might seem natural to subtract one from the other and then examine the difference, much as we would do for instance with visual/infrared ground reflectance images. In the case of SAR intensity images this is not a good idea. In the difference of two uncorrelated multilook images $\\langle s_1\\rangle$ and $\\langle s_2\\rangle$ the variances add together and, from Eq. (1.21) in the first part of the Tutorial,\n", + " \n", + "$$\n", + "{\\rm var}(\\langle s_1\\rangle-\\langle s_2\\rangle) = {a_1^2+a_2^2\\over m}, \\tag{2.4}\n", + "$$\n", + " \n", + "where $a_1$ and $a_2$ are mean intensities. So difference pixels in bright areas will have a higher variance than difference pixels in darker areas. It is not possible to set a reliable threshold to determine with a given confidence where change has occurred. \n", + " \n", + "It turns out that the _F_ distributed ratio of the two images which we looked at above is much more informative. For each pixel position in the two images, the quotient $\\langle s_1\\rangle / \\langle s_2\\rangle$ is a _likelihood ratio test statistic_ for deciding whether or not a change has occurred between the two acquisition dates at that position. We will explain what this means below. Here for now is the ratio of the two Frankfurt Airport images, this time within the complete _aoi_:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9RYbVUN-G9LR" + }, + "outputs": [], + "source": [ + "im1 = ee.Image(im_list.get(0)).select('VV').clip(aoi)\n", + "im2 = ee.Image(im_list.get(1)).select('VV').clip(aoi)\n", + "ratio = im1.divide(im2)\n", + "\n", + "location = aoi.centroid().coordinates().getInfo()[::-1]\n", + "mp = folium.Map(location=location, zoom_start=12)\n", + "mp.add_ee_layer(ratio,\n", + " {'min': 0, 'max': 20, 'palette': ['black', 'white']}, 'Ratio')\n", + "mp.add_child(folium.LayerControl())\n", + "\n", + "display(mp)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VWFs_0UiK-xs" + }, + "source": [ + "We might guess that the bright pixels here are significant changes, for instance due to aircraft movements on the tarmac or vehicles moving on the highway. Of course ''significant'' doesn't necessarily imply ''interesting''. We already know Frankfurt has a busy airport and that a German Autobahn is always crowded. The question is, how significant are the changes in the statistical sense? Let's now try to answer that question." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8gE7aypYKo0q" + }, + "source": [ + "### Statistical testing\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k3XO4qP3wNUP" + }, + "source": [ + "A _statistical hypothesis_ is a conjecture about the distributions of one or more measured variables. It might, for instance, be an assertion about the mean of a distribution, or about the equivalence of the variances of two different distributions. We distinguish between _simple_ hypotheses, for which the distributions are completely specified, for example: _the mean of a normal distribution with variance $\\sigma^2$ is $\\mu=0$_, and _composite_ hypotheses, for which this is not the case, e.g., _the mean is $\\mu\\ge 0$_.\n", + "\n", + "In order to test such assertions on the basis of measured values, it is also necessary to formulate _alternative_ hypotheses. To distinguish these from the original assertions, the latter are traditionally called _null_ hypotheses. Thus we might be interested in testing the simple null hypothesis $\\mu = 0$ against the composite alternative hypothesis $\\mu\\ne 0$. An appropriate combination of measurements for deciding whether or not to reject the null hypothesis in favor of its alternative is referred to as a _test statistic_, often denoted by the symbol $Q$. An appropriate _test procedure_ will partition the possible test statistics into two subsets: an acceptance region for the null hypothesis and a rejection region. The latter is customarily referred to as the _critical region_.\n", + "\n", + "Referring to the null hypothesis as $H_0$, there are two kinds of errors which can arise from any test procedure:\n", + "\n", + " - $H_0$ may be rejected when in fact it is true. This is called an _error of the first kind_ and the probability that it will occur is denoted $\\alpha$.\n", + " - $H_0$ may be accepted when in fact it is false, which is called an _error of the second kind_ with probability of occurrence $\\beta$.\n", + "\n", + "The probability of obtaining a value of the test statistic within the critical region when $H_0$ is true is thus $\\alpha$. The probability $\\alpha$ is also referred to as the _level of significance_ of the test or the _probability of a false positive_. It is generally the case that the lower the value of $\\alpha$, the higher is the probability $\\beta$ of making a second kind error, so there is always a trade-off. (Judge Roy Bean, from the film of the same name, didn't believe in trade-offs. He hanged all defendants regardless of the evidence. His $\\beta$ was zero, but his $\\alpha$ was rather large.)\n", + "\n", + "At any rate, traditionally, significance levels of 0.01 or 0.05 are often used.\n", + "\n", + "#### The _P_ value\n", + "\n", + "Suppose we determine the test statistic to have the value $q$. The _P value_ is defined as the probability of getting a test statistic $Q$ that is at least as extreme as the one observed given the null hypothesis. What is meant by \"extreme\" depends on how we choose the test statistic. If this probability is small, then the null hypothesis is unlikely. If it is smaller than the prescribed significance level $\\alpha$, then the null hypothesis is rejected." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FqqH9A3OtY2f" + }, + "source": [ + "#### Likelihood Functions\n", + "\n", + "The $m$-look VV intensity bands of the two Sentinel-1 images that we took from the archive have pixel values\n", + "\n", + "$$\n", + "\\langle s\\rangle=\\langle|S_{vv}|^2\\rangle, \\quad {\\rm with\\ mean}\\ a=|S^a_{vv}|^2,\n", + "$$\n", + "\n", + "and are _gamma_ distributed according to [Eq. (1.1)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#pixel_distributions), with parameters $\\alpha=m$ and $\\beta = a/m$. To make the notation a bit simpler, let's write $s = \\langle s \\rangle$, so that the multi-look averaging is understood.\n", + "\n", + "Using subscript $i=1,2$ to refer to the two images, the probability densities are\n", + "\n", + "$$\n", + "p(s_i| a_i) = {1 \\over (a_i/m)^m\\Gamma(m)}s_i^{m-1}e^{-s_i m/a_i},\\quad i=1,2. \\tag{2.5}\n", + "$$\n", + "\n", + "We've left out the number of looks $m$ on the left hand side, since it is the same for both images. \n", + "\n", + "Now let's formulate a null hypothesis, namely that no change has taken place in the signal strength $a = |S^a_{vv}|^2$ between the two acquisitions, i.e.,\n", + "\n", + "$$\n", + "H_0: \\quad a_1=a_2 = a\n", + "$$ \n", + "\n", + "and test it against the alternative hypothesis that a change took place\n", + "\n", + "$$\n", + "H_1: \\quad a_1\\ne a_2.\n", + "$$ \n", + "\n", + "If the null hypothesis is true, then the so-called _likelihood_ for getting the measured pixel intensities $s_1$ and $s_2$ is defined as the product of the probability densities for that value of $a$,\n", + "\n", + "$$\n", + "L_0(a) = p(s_1|a)p(s_2|a) = {1\\over(a/m)^{2m}\\Gamma(m)^2}(s_1s_2)^{m-1}e^{-(s_1+s_2)m/a}. \\tag{2.6}\n", + "$$\n", + "\n", + "Taking the product of the probability densities like this is justified by the fact that the measurements $s_1$ and $s_2$ are independent.\n", + "\n", + "The _maximum likelihood_ is obtained by maximizing $L_0(a)$ with respect to $a$,\n", + "\n", + "$$\n", + "L_0(\\hat a) = p(s_1|\\hat a)p(s_2|\\hat a), \\quad \\hat a = \\arg\\max_a L_0(a). \n", + "$$\n", + "\n", + "We can get $\\hat a$ simply by solving the equation\n", + "\n", + "$$\n", + "{d L_0(a)\\over da} = 0\n", + "$$\n", + "\n", + "for which we derive the maximum likelihood estimate (an easy exercise)\n", + "\n", + "$$\n", + "\\hat a = {s_1 + s_2 \\over 2}.\n", + "$$\n", + "\n", + "Makes sense: the only information we have is $s_1$ and $s_2$, so, if there was no change, our best estimate of the intensity $a$ is to take the average. Thus, substituting this value into Eq. (2.6), the maximum likelihood under $H_0$ is\n", + "\n", + "$$\n", + "L_0(\\hat a) = {1\\over ((s_1+s_2)/2m)^{2m}\\Gamma(m)^2}(s_1s_2)^{m-1}e^{-2m}. \\tag{2.7}\n", + "$$\n", + "\n", + "Similarly, under the alternative hypothesis $H_1$, the maximum likelihood is\n", + "\n", + "$$\n", + "L_1(\\hat a_1,\\hat a_2) = p(s_1|\\hat a_1)p(s_2|\\hat a_2)\\quad \\hat a_1, \\hat a_2 = \\arg\\max_{a_1,a_2} L_1(a_1,a_2). \n", + "$$\n", + "\n", + "Again, setting derivatives equal to zero, we get for $H_1$\n", + "\n", + "$$\n", + "\\hat a_1 = s_1, \\quad \\hat a_2 = s_2,\n", + "$$\n", + "\n", + "and the maximum likelihood\n", + "\n", + "$$\n", + "L_1(\\hat a_1,\\hat a_2) = {m^{2m}\\over \\Gamma(m)^2}s_1s_2 e^{-2m}. \\tag{2.8}\n", + "$$\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ki2RrEjgTb39" + }, + "source": [ + "### The Likelihood Ratio Test\n", + " \n", + "The theory of statistical testing specifies methods for\n", + "determining the most appropriate test procedure, one which minimizes the probability $\\beta$ of an error of the second kind for a fixed level of significance $\\alpha$. Rather than giving a general definition, we state the appropriate test for our case: \n", + " \n", + "We should reject the null hypothesis if the _ratio_ of the two likelihoods satisfies the inequality\n", + " \n", + "$$\n", + "Q = {L_0(\\hat a)\\over L_1(\\hat a_1,\\hat a_2)} \\le k \\tag{2.9}\n", + "$$\n", + " \n", + "for some appropriately small value of threshold $k$.\n", + " \n", + "This definition simply reflects the fact that, if the null hypothesis is true, the maximum likelihood when $a_1=a_2$ should be close to the maximum likelihood without that restriction, given the measurements $s_1$ and $s_2$. Therefore, if the likelihood ratio is small, (less than or equal to some small value $k$), then $H_0$ should be rejected. \n", + " \n", + "With some (very) simply algebra, Eq. (2.9) evaluates to\n", + " \n", + "$$\n", + "Q = \\left[2^2 \\left( s_1s_2\\over (s_1+s_2)^2\\right)\\right]^m \\le k \\tag{2.10}\n", + "$$\n", + " \n", + "using (2.7) and (2.8). This is the same as saying\n", + " \n", + "$$\n", + "{s_1s_2\\over (s_1+s_2)^2} \\le k'\\quad {\\rm or}\\quad {(s_1+s_2)^2\\over s_1s_2}\\ge k''\\quad {\\rm or}\\quad {s_1\\over s_2}+{s_2\\over s_1}\\ge k''-2\n", + "$$\n", + " \n", + "where $k',k''$ depend on $k$. The last inequality is satisfied if either term is small enough:\n", + " \n", + "$$\n", + "{s_1\\over s_2} \u003c c_1 \\quad {\\rm or}\\quad {s_2\\over s_1} \u003c c_2 \\tag{2.11}\n", + "$$\n", + " \n", + "again for some appropriate threshold $c_1$ and $c_2$ which depend on $k''$. \n", + " \n", + "So the ratio image $s_1/s_2$ that we generated above is indeed a _Likelihood Ratio Test (LRT) statistic_, one of two possible. We'll call it $Q_1 = s_1/s_2$ and the other one $Q_2 = s_2/s_1$. The former tests for a significant increase in intensity between times $t_1$ and $t_2$, the latter for a significant decrease.\n", + " \n", + "Fine, but where does the _F_ distribution come in?\n", + " \n", + "Both $s_1$ and $s_2$ are gamma distributed\n", + " \n", + "$$\n", + "p(s\\mid a) = {1\\over (a/m)^m\\Gamma(m)}s^{m-1}e^{-sm/a}.\n", + "$$\n", + " \n", + "Let $z = 2sm/a$. Then\n", + " \n", + "$$\n", + "p(z\\mid a) = p(s\\mid a)\\left |{ds\\over dz}\\right | = {1\\over (a/m)^m\\Gamma(m)}\\left({za\\over 2m}\\right)^{m-1}\\left({a\\over 2m}\\right) = {1\\over 2^m\\Gamma(m)}z^{m-1}e^{-z/2}.\n", + "$$\n", + " \n", + "Comparing this with [Eq. (1.12)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#speckle) from the first part of the Tutorial, we see that $z$ is chi square distributed with $2m$ degrees of freedom, and therefore so are the variables $2s_1m/a$ and $2s_2m/a$. The quotients $s_1/s_2$ and $s_2/s_1$ are thus ratios of two chi square distributed variables with $2m$ degrees of freedom. They therefore have the _F_ distribution of Eq. (2.1).\n", + " \n", + "In order to decide the test for $Q_1$, we need the _P_ value for a measurement $q_1$ of the statistic. Recall that this is the probability of getting a result at least as extreme as the one measured under the null hypothesis. So in this case\n", + " \n", + "$$\n", + "P_1 = {\\rm Prob}(Q_1\\le q_1\\mid H_0), \\tag{2.12}\n", + "$$\n", + " \n", + "which we can calculate from the percentiles of the _F_ distribution, Eq. (2.1). Then if $P_1\\le \\alpha/2$ we reject $H_0$ and conclude with significance $\\alpha/2$ that a change occurred. We do the same test for $Q_2$, so that the combined significance is $\\alpha$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kocUkbzZ3vkQ" + }, + "source": [ + "Now we can make a change map for the Frankfurt Airport for the two acquisitions, August 5 and August 11, 2020. We want to see quite large changes associated primarily with airplane and vehicle movements, so we will set the significance generously low to $\\alpha = 0.001$. We will also distinguish the direction of change and mask out the no-change pixels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DXpGkHFrp4Q3" + }, + "outputs": [], + "source": [ + "# Decision threshold alpha/2:\n", + "dt = f.ppf(0.0005, 2*m, 2*m)\n", + "\n", + "# LRT statistics.\n", + "q1 = im1.divide(im2)\n", + "q2 = im2.divide(im1)\n", + "\n", + "# Change map with 0 = no change, 1 = decrease, 2 = increase in intensity.\n", + "c_map = im1.multiply(0).where(q2.lt(dt), 1)\n", + "c_map = c_map.where(q1.lt(dt), 2)\n", + "\n", + "# Mask no-change pixels.\n", + "c_map = c_map.updateMask(c_map.gt(0))\n", + "\n", + "# Display map with red for increase and blue for decrease in intensity.\n", + "location = aoi.centroid().coordinates().getInfo()[::-1]\n", + "mp = folium.Map(\n", + " location=location, tiles='cartodb positron',\n", + " zoom_start=13)\n", + "folium.TileLayer('OpenStreetMap').add_to(mp)\n", + "mp.add_ee_layer(ratio,\n", + " {'min': 0, 'max': 20, 'palette': ['black', 'white']}, 'Ratio')\n", + "mp.add_ee_layer(c_map,\n", + " {'min': 0, 'max': 2, 'palette': ['black', 'blue', 'red']},\n", + " 'Change Map')\n", + "mp.add_child(folium.LayerControl())\n", + "\n", + "display(mp)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YSfPdRftGdrW" + }, + "source": [ + "Most changes are within the airport or on the Autobahn. Barge movements on the Main River (upper left hand corner) are also signaled as significant changes. Note that the 'red' changes (significant increases in intensity) do not show up in the 'ratio' overlay, which displays $s_1/s_2$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mYnuTu4rpcn" + }, + "source": [ + "### Bivariate change detection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mCZbWa7gEaHH" + }, + "source": [ + "Rather than analyzing the VV and VH bands individually, it would make more sense to treat them together, and that is what we will now do. It is convenient to work with the covariance matrix form for measured intensities that we introduce in Part 1, see [Eq.(1.6a)](https://developers.google.com/earth-engine/tutorials/community/detecting-changes-in-sentinel-1-imagery-pt-1#single_look_complex_slc_sar_measurements). Again with the aim of keeping the notation simple, define\n", + "\n", + "$$\n", + "\\pmatrix{ s_i \u0026 0\\cr 0 \u0026 r_i} = \\pmatrix{\\langle|S_{vv}|^2\\rangle_i \u0026 0 \\cr 0 \u0026 \\langle|S_{vh}|^2\\rangle_i}, \\quad {\\rm with\\ means}\\quad a_i = \\langle|S^{a_i}_{vv}|^2\\rangle, \\quad b_i = \\langle|S^{b_i}_{vh}|^2\\rangle \\tag{2.13}\n", + "$$\n", + "\n", + "for the two acquisition times $t_i,\\ i=1,2$. \n", + "\n", + "Under $H_0$ we have $a_1=a_2=a$ and $b_1=b_2=b$. Assuming independence of $s_i$ and $r_i$, the likelihood function is the product of the four gamma distributions\n", + "\n", + "$$\n", + "L_0(a,b) = p(s_1\\mid a)p(r_1\\mid b)p(s_2\\mid a)p(r_2\\mid b).\n", + "$$\n", + "\n", + "Under $H_1$,\n", + "\n", + "$$\n", + "L_1(a_1,b_1,a_2,b_2) = p(s_1\\mid a_1)p(r_1\\mid b_1)p(s_2\\mid a_2)p(r_2\\mid b_2).\n", + "$$\n", + "\n", + "With maximum likelihood estimates under $H_0$ \n", + "\n", + "$$\n", + "\\hat a = (s_1+s_2)/2\\quad {\\rm and}\\quad \\hat b = (r_1+r_2)/2\n", + "$$ \n", + "\n", + "for the parameters and some simple algebra, we get \n", + "\n", + "$$\n", + "L_0(\\hat a,\\hat b) = {(2m)^{4m}\\over (s_1+s_2)^{2m}(r_1+r_2)^{2m}\\Gamma(m)^4}s_1r_1s_2r_2e^{-4m}. \\tag{2.14}\n", + "$$ \n", + "\n", + "Similarly with $\\hat a_1=s_1,\\ \\hat b_1=r_1,\\ \\hat a_2=s_2,\\ \\hat b_2=r_2$, we calculate\n", + "\n", + "$$\n", + "L_1(\\hat a_1,\\hat b_1,\\hat a_2,\\hat b_2) = {m^{4m}\\over s_1r_1s_2r_2}e^{-4m}.\n", + "$$\n", + "\n", + "The likelihood test statistic in then\n", + "\n", + "$$\n", + "Q = {L_0(\\hat a,\\hat b)\\over L_1(\\hat a_1,\\hat b_1,\\hat a_2,\\hat b_2)}={2^4(s_1r_1s_2r_2)^m\\over (s_1+s_2)^{2m}(r_1+r_2)^{2m}}.\n", + "$$\n", + "\n", + "Writing this in terms of the covariance matrix representation,\n", + "\n", + "$$\n", + "c_i = \\pmatrix{s_i \u0026 0\\cr 0 \u0026 r_i},\\quad i=1,2,\n", + "$$\n", + "\n", + "we derive, finally, the likelihood ratio test\n", + "\n", + "$$\n", + "Q = \\left[2^4\\pmatrix{|c_1| |c_2|\\over |c_1+c_2|^2 }\\right]^m \\le k, \\tag{2.15}\n", + "$$\n", + "\n", + "where $|\\cdot|$ indicates the matrix determinant, $|c_i|=s_ir_i$. \n", + "\n", + "So far so good. But in order to determine _P_ values, we need the probability distribution of $Q$. This time we have no idea how to obtain it. Here again, statistical theory comes to our rescue.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hWU-gu4W3p-_" + }, + "source": [ + "Let $\\Theta$ be the parameter space for the LRT. In our example it is \n", + "$$\n", + "\\Theta = \\{ a_1,b_1,a_2,b_2\\}\n", + "$$ \n", + "and has $d=4$ dimensions. Under the null hypothesis the parameter space is restricted by the conditions $a=a_1=a_2$ and $b=b_1=b_2$ to \n", + "$$\n", + "\\Theta_0 = \\{ a,b\\}\n", + "$$ \n", + "with $d_0=2$ dimensions. According to [Wilks' Theorem](https://en.wikipedia.org/wiki/Wilks%27_theorem), as the number of measurements determining the LRT statistic $Q$ approaches $\\infty$, the test statistic $-2\\log Q$ approaches a chi square distribution with $d-d_0=2$ degrees of freedom. (Recall that, in order to determine the matrices $c_1$ and $c_2$, five individual measurements were averaged or multi-looked.) So rather than working with $Q$ directly, we use $-2\\log Q$ instead and hope that Wilk's theorem is a good enough approximation for our case.\n", + "\n", + "In order to check if this is so, we just have to program \n", + "\n", + "$$\n", + "-2\\log Q = (\\log{|c_1|}+\\log{|c_2|}-2\\log{|c_1+c_2|}+4\\log{2})(-2m)\n", + "$$ \n", + "\n", + "in GEE-ese:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5HKdnWr8YU1U" + }, + "outputs": [], + "source": [ + "def det(im):\n", + " return im.expression('b(0) * b(1)')\n", + "\n", + "# Number of looks.\n", + "m = 5\n", + "\n", + "im1 = ee.Image(im_list.get(0)).select('VV', 'VH').clip(aoi)\n", + "im2 = ee.Image(im_list.get(1)).select('VV', 'VH').clip(aoi)\n", + "\n", + "m2logQ = det(im1).log().add(det(im2).log()).subtract(\n", + " det(im1.add(im2)).log().multiply(2)).add(4*np.log(2)).multiply(-2*m)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wmprc_di-PBz" + }, + "source": [ + "and then plot its histogram, comparing it with the chi square distribution _scipy.stats.chi2.pdf()_ with two degrees of freedom:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EVAjH_uR_kkg" + }, + "outputs": [], + "source": [ + "hist = m2logQ.reduceRegion(\n", + " ee.Reducer.fixedHistogram(0, 20, 200), aoi).get('VV').getInfo()\n", + "a = np.array(hist)\n", + "x = a[:, 0]\n", + "y = a[:, 1] / np.sum(a[:, 1])\n", + "plt.plot(x, y, '.', label='data')\n", + "plt.plot(x, chi2.pdf(x, 2)/10, '-r', label='chi square')\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KvOXWaRhCQ8G" + }, + "source": [ + "Looks pretty good. Note now that a small value of the LRT $Q$ in Eq. (2.15) corresponds to a large value of $-2\\log{Q}$. Therefore the _P_ value for a measurement $q$ is now the probability of getting the value $-2\\log{q}$\n", + "or higher,\n", + "$$\n", + "P = {\\rm Prob}(-2\\log{Q} \\ge -2\\log{q}) = 1 - {\\rm Prob}(-2\\log{Q} \u003c -2\\log{q}).\n", + "$$\n", + "\n", + "So let's try out our bivariate change detection procedure, this time on an agricultural scene where we expect to see larger regions of change." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UHYIAceXKNjG" + }, + "outputs": [], + "source": [ + "geoJSON ={\n", + " \"type\": \"FeatureCollection\",\n", + " \"features\": [\n", + " {\n", + " \"type\": \"Feature\",\n", + " \"properties\": {},\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [\n", + " -98.2122802734375,\n", + " 49.769291532628515\n", + " ],\n", + " [\n", + " -98.00559997558594,\n", + " 49.769291532628515\n", + " ],\n", + " [\n", + " -98.00559997558594,\n", + " 49.88578690918283\n", + " ],\n", + " [\n", + " -98.2122802734375,\n", + " 49.88578690918283\n", + " ],\n", + " [\n", + " -98.2122802734375,\n", + " 49.769291532628515\n", + " ]\n", + " ]\n", + " ]\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "coords = geoJSON['features'][0]['geometry']['coordinates']\n", + "aoi1 = ee.Geometry.Polygon(coords)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IpOZqWqUROoG" + }, + "source": [ + "This is a mixed agricultural/forest area in southern Manitoba, Canada. We'll gather two images, one from the beginning of August and one from the beginning of September, 2018. A lot of harvesting takes place in this interval, so we expect some extensive changes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DmQfIKvmLdCc" + }, + "outputs": [], + "source": [ + "im1 = ee.Image(ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT')\n", + " .filterBounds(aoi1)\n", + " .filterDate(ee.Date('2018-08-01'), ee.Date('2018-08-31'))\n", + " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", + " .filter(ee.Filter.eq('relativeOrbitNumber_start', 136))\n", + " .first()\n", + " .clip(aoi1))\n", + "im2 = ee.Image(ee.ImageCollection('COPERNICUS/S1_GRD_FLOAT').filterBounds(aoi1)\n", + " .filterDate(ee.Date('2018-09-01'), ee.Date('2018-09-30'))\n", + " .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))\n", + " .filter(ee.Filter.eq('relativeOrbitNumber_start', 136))\n", + " .first()\n", + " .clip(aoi1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qaeCqVQhEe9F" + }, + "source": [ + "Here are the acquisition times:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EZjsAxhKKmZs" + }, + "outputs": [], + "source": [ + "acq_time = im1.get('system:time_start').getInfo()\n", + "print( time.strftime('%x', time.gmtime(acq_time/1000)) )\n", + "acq_time = im2.get('system:time_start').getInfo()\n", + "print( time.strftime('%x', time.gmtime(acq_time/1000)) )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5T9VFW1hSZMR" + }, + "source": [ + "Fortunately it is possible to map the chi square cumulative distribution function over an _ee.Image()_ so that a _P_ value image can be calculated directly. This wasn't possible in the single band case, as the _F_ cumulative distribution is not available on the GEE. Here are the _P_ values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kmU0Qt8GK8CB" + }, + "outputs": [], + "source": [ + "def chi2cdf(chi2, df):\n", + " ''' Chi square cumulative distribution function for df degrees of freedom\n", + " using the built-in incomplete gamma function gammainc() '''\n", + " return ee.Image(chi2.divide(2)).gammainc(ee.Number(df).divide(2))\n", + "\n", + "# The observed test statistic image -2logq.\n", + "m2logq = det(im1).log().add(det(im2).log()).subtract(\n", + " det(im1.add(im2)).log().multiply(2)).add(4*np.log(2)).multiply(-2*m)\n", + "\n", + "# The P value image prob(m2logQ \u003e m2logq) = 1 - prob(m2logQ \u003c m2logq).\n", + "p_value = ee.Image.constant(1).subtract(chi2cdf(m2logq, 2))\n", + "\n", + "# Project onto map.\n", + "location = aoi1.centroid().coordinates().getInfo()[::-1]\n", + "mp = folium.Map(location=location, zoom_start=12)\n", + "mp.add_ee_layer(p_value,\n", + " {'min': 0,'max': 1, 'palette': ['black', 'white']}, 'P-value')\n", + "mp.add_child(folium.LayerControl())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZLQlE1IlM6Dj" + }, + "source": [ + "The uniformly dark areas correspond to small or vanishing _P_ values and signify change. The bright areas correspond to no change. Why they are not uniformly bright will be explained below. Now we set a significance threshold of $\\alpha=0.01$ and display the significant changes, whereby 1% of them will be false positives. For reference we also show the 2018 [Canada AAFC Annual Crop Inventory](https://developers.google.com/earth-engine/datasets/catalog/AAFC_ACI) map, which is available as a GEE collection:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lInJMXCyTlaF" + }, + "outputs": [], + "source": [ + "c_map = p_value.multiply(0).where(p_value.lt(0.01), 1)\n", + "\n", + "crop2018 = (ee.ImageCollection('AAFC/ACI')\n", + " .filter(ee.Filter.date('2018-01-01', '2018-12-01'))\n", + " .first()\n", + " .clip(aoi1))\n", + "\n", + "mp = folium.Map(location=location, zoom_start=12)\n", + "mp.add_ee_layer(crop2018, {min: 0, max: 255}, 'crop2018')\n", + "mp.add_ee_layer(c_map.updateMask(\n", + " c_map.gt(0)), {'min': 0, 'max': 1, 'palette': ['black', 'red']}, 'c_map')\n", + "mp.add_child(folium.LayerControl())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vATDsxFOOESB" + }, + "source": [ + " The major crops in the scene are soybeans (dark brown), oats (light brown), canola (light green), corn (light yellow) and winter wheat (dark gray). The wooded areas exhibit little change, while canola has evidently been extensively harvested in the interval." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "COkFrH11bpNI" + }, + "source": [ + "#### A note on _P_ values\n", + "Because small _P_ values are indicative of change, it is tempting to say that, the larger the _P_ value, the higher the probability of no change. Or more explicitly, the _P_ value is itself the no change probability. Let's see why this is false. Below we choose a wooded area of the agricultural scene where few significant changes are to be expected and use it to subset the _P_ value image. Then we plot the histogram of the subset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "761UOg0UCEmQ" + }, + "outputs": [], + "source": [ + "geoJSON ={\n", + " \"type\": \"FeatureCollection\",\n", + " \"features\": [\n", + " {\n", + " \"type\": \"Feature\",\n", + " \"properties\": {},\n", + " \"geometry\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " [\n", + " -98.18550109863281,\n", + " 49.769735012247885\n", + " ],\n", + " [\n", + " -98.13949584960938,\n", + " 49.769735012247885\n", + " ],\n", + " [\n", + " -98.13949584960938,\n", + " 49.798109268622\n", + " ],\n", + " [\n", + " -98.18550109863281,\n", + " 49.798109268622\n", + " ],\n", + " [\n", + " -98.18550109863281,\n", + " 49.769735012247885\n", + " ]\n", + " ]\n", + " ]\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "coords = geoJSON['features'][0]['geometry']['coordinates']\n", + "aoi1_sub = ee.Geometry.Polygon(coords)\n", + "hist = p_value.reduceRegion(ee.Reducer.fixedHistogram(0, 1, 100), aoi1_sub).get('constant').getInfo()\n", + "a = np.array(hist)\n", + "x = a[:,0]\n", + "y = a[:,1]/np.sum(a[:,1])\n", + "plt.plot(x, y, '.b', label='p-value')\n", + "plt.ylim(0, 0.05)\n", + "plt.grid()\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pz4ipvMKPmxK" + }, + "source": [ + "So the P values of no-change measurements are uniformly distributed over $[0, 1]$ (the excess of small _P_ values at the left can be ascribed to genuine changes within the polygon). A large _P_ value is no more indicative of no change than a small one. Of course it has to be this way. When, for example, we set a significance level of 5%, then the fraction of false positives, i.e., the fraction of _P_ values smaller than 0.05 given $H_0$, must also be 5%. This accounts for the noisy appearance of the _P_ value image in the no-change regions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-LnZpAKQcRz5" + }, + "source": [ + "#### Change direction: the Loewner order\n", + "What about the direction of change in the bivariate case? This is less clear, as we can have the situation where the VV intensity gets larger and the VH smaller from time $t_1$ to $t_2$, or vice versa. When we are dealing with the C2 covariance matrix representation of SAR imagery, see Eq. (2.13), a characterization of change can be made as follows [(Nielsen et al. (2019))](https://ieeexplore.ieee.org/document/8736751): For each significantly changed pixel, we determine the difference $C2_{t_2}-C2_{t_1}$ and examine its so-called _definiteness_, also known as the _Loewner order_ of the change. A matrix is said to be _positive definite_ if all of its eigenvalues are positive, _negative definite_ if they are all negative, otherwise _indefinite_. In the case of the $2\\times 2$ diagonal matrices that we are concerned with the eigenvalues are just the two diagonal elements themselves, so determining the Loewner order is trivial. For full $2\\times 2$ dual pol or $3\\times 3$ quad pol SAR imagery, devising an efficient way to determine the Loewner order is more difficult, see [Nielsen (2019)](https://ieeexplore.ieee.org/document/8913617).\n", + "\n", + "So let's include the Loewner order in our change map:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FOL0V1DNsCqH" + }, + "outputs": [], + "source": [ + "c_map = p_value.multiply(0).where(p_value.lt(0.01), 1)\n", + "diff = im2.subtract(im1)\n", + "d_map = c_map.multiply(0) # Initialize the direction map to zero.\n", + "d_map = d_map.where(det(diff).gt(0), 2) # All pos or neg def diffs are now labeled 2.\n", + "d_map = d_map.where(diff.select(0).gt(0), 3) # Re-label pos def (and label some indef) to 3.\n", + "d_map = d_map.where(det(diff).lt(0), 1) # Label all indef to 1.\n", + "c_map = c_map.multiply(d_map) # Re-label the c_map, 0*X = 0, 1*1 = 1, 1*2= 2, 1*3 = 3." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MVh9eU6V3bVF" + }, + "source": [ + "Now we display the changes, with positive definite red, negative definite blue, and indefinite yellow:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b8_ggucVvOuh" + }, + "outputs": [], + "source": [ + "mp = folium.Map(location=location, zoom_start=12)\n", + "mp.add_ee_layer(crop2018, {min: 0, max: 255}, 'crop2018')\n", + "mp.add_ee_layer(\n", + " c_map.updateMask(c_map.gt(0)), {\n", + " 'min': 0,\n", + " 'max': 3,\n", + " 'palette': ['black', 'yellow', 'blue', 'red']\n", + " }, 'c_map')\n", + "mp.add_child(folium.LayerControl())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ghLYSYeLSj0P" + }, + "source": [ + "The more or less compact blue changes indicate a decrease in reflectivity in both VV and VH bands, and correspond to crop harvesting (especially canola).\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RvQO9S0W8UEQ" + }, + "source": [ + "### Outlook\n", + "We have now covered the subject of bitemporal change detection with GEE Sentinel-1 imagery. The beauty of GEE is that it is trivially easy to gather arbitrarily long time series of S1 images from the archive, all with revisit times of 6 or 12 days depending on whether one or both satellites are collecting data. The next part of the Tutorial will generalize the techniques we have learned so far to treat multitemporal change detection.\n", + "\n", + "### Oh, and one more thing ...\n", + "\n", + "We didn't mention it above, but note the similarity between Eq. (2.10) and Eq. (2.15). To go from the monovariate LRT to the bivariate LRT, we simply replace the product of intensities $s_1s_2$ by the product of determinants $|c_1||c_2|$, the sum $s_1+s_2$ by $|c_1+c_2|$ and the factor $2^{2}$ by $2^4=2^{2\\cdot2}$. This observation will come in handy in Part 3." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Detecting Changes in Sentinel-1 Imagery (Part 2)", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 } diff --git a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-3/index.ipynb b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-3/index.ipynb index 1f1adc295..a8e0e7f1a 100644 --- a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-3/index.ipynb +++ b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-3/index.ipynb @@ -71,7 +71,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -157,7 +157,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles = map_id_dict['tile_fetcher'].url_format,\n", - " attr = 'Map Data © Google Earth Engine',\n", + " attr = 'Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " name = name,\n", " overlay = True,\n", " control = True).add_to(self)\n", @@ -668,8 +668,8 @@ "\n", "$$\n", "\\begin{align*}\n", - "H_0:\\ & a_1=a_2= a_3\\ (=a)\\cr\n", - "{\\rm against}\\quad H_1:\\ &a_1=a_2\\ (=a) \\ne a_3.\n", + "H_0:\\ \u0026 a_1=a_2= a_3\\ (=a)\\cr\n", + "{\\rm against}\\quad H_1:\\ \u0026a_1=a_2\\ (=a) \\ne a_3.\n", "\\end{align*}\n", "$$\n", "\n", @@ -677,12 +677,12 @@ "\n", "$$\n", "\\begin{align*}\n", - " {\\rm From\\ Eq.}\\ (3.4):\\ &L_0(a) = {1\\over\\Gamma(m)^3} \\left[{a\\over m}\\right]^{-3m}\\left[s_1s_2s_3\\right]^{m-1}\\exp(-{m\\over a}(s_1+s_2+s_3) \\cr\n", - " &\\hat a = {1\\over 3}(s_1+s_2+s_3) \\cr\n", - "=>\\ &L_0(\\hat a) = {1\\over\\Gamma(m)^3} \\left[{s_1+s_2+s_3\\over 3m}\\right]^{-3m}\\left[s_1s_2s_3\\right]^{m-1} \\exp(-3m) \\cr\n", - "{\\rm From\\ Eq.}\\ (3.3):\\ &L_1(a_1,a_2,a_3) = {1\\over\\Gamma(m)^3}\\left[a_1a_2a_3\\over m\\right]^{-m}[s_1s_2s_3]^{m-1}\\exp(-m(s_1/a_1+s_2/a_2+s_3/a_3)\\cr\n", - "&\\hat a_1 = \\hat a_2 = {1\\over 2}(s_1+s_2),\\quad \\hat a_3 = s_3 \\cr\n", - "=>\\ &L_1(\\hat a_1,\\hat a_2, \\hat a_3) = {1\\over\\Gamma(m)^3}\\left[(s_1+s_2)^2s_3\\over 2^2m \\right]^{-m}[s_1s_2s_3]^{m-1}\\exp(-3m)\n", + " {\\rm From\\ Eq.}\\ (3.4):\\ \u0026L_0(a) = {1\\over\\Gamma(m)^3} \\left[{a\\over m}\\right]^{-3m}\\left[s_1s_2s_3\\right]^{m-1}\\exp(-{m\\over a}(s_1+s_2+s_3) \\cr\n", + " \u0026\\hat a = {1\\over 3}(s_1+s_2+s_3) \\cr\n", + "=\u003e\\ \u0026L_0(\\hat a) = {1\\over\\Gamma(m)^3} \\left[{s_1+s_2+s_3\\over 3m}\\right]^{-3m}\\left[s_1s_2s_3\\right]^{m-1} \\exp(-3m) \\cr\n", + "{\\rm From\\ Eq.}\\ (3.3):\\ \u0026L_1(a_1,a_2,a_3) = {1\\over\\Gamma(m)^3}\\left[a_1a_2a_3\\over m\\right]^{-m}[s_1s_2s_3]^{m-1}\\exp(-m(s_1/a_1+s_2/a_2+s_3/a_3)\\cr\n", + "\u0026\\hat a_1 = \\hat a_2 = {1\\over 2}(s_1+s_2),\\quad \\hat a_3 = s_3 \\cr\n", + "=\u003e\\ \u0026L_1(\\hat a_1,\\hat a_2, \\hat a_3) = {1\\over\\Gamma(m)^3}\\left[(s_1+s_2)^2s_3\\over 2^2m \\right]^{-m}[s_1s_2s_3]^{m-1}\\exp(-3m)\n", "\\end{align*}\n", "$$\n", "\n", @@ -1105,8 +1105,8 @@ "\n", "$$\n", "\\begin{align*}\n", - "\\bar c_i &= \\bar c_{i-1} + (c_i - \\bar c_{i-1})/i \\cr\n", - "\\bar c_1 &= c_1.\n", + "\\bar c_i \u0026= \\bar c_{i-1} + (c_i - \\bar c_{i-1})/i \\cr\n", + "\\bar c_1 \u0026= c_1.\n", "\\end{align*}\n", "$$\n", "\n", @@ -1282,4 +1282,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-4/index.md b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-4/index.md index 7280ee91e..b103f9d41 100644 --- a/tutorials/detecting-changes-in-sentinel-1-imagery-pt-4/index.md +++ b/tutorials/detecting-changes-in-sentinel-1-imagery-pt-4/index.md @@ -46,7 +46,7 @@ import ee # Trigger the authentication flow. ee.Authenticate() # Initialize the library. -ee.Initialize() +ee.Initialize(project='my-project') ``` Enable the Widget manager. diff --git a/tutorials/groundwater-recharge-estimation/index.ipynb b/tutorials/groundwater-recharge-estimation/index.ipynb index f5f40ac1b..56706b08b 100644 --- a/tutorials/groundwater-recharge-estimation/index.ipynb +++ b/tutorials/groundwater-recharge-estimation/index.ipynb @@ -77,7 +77,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ] }, { @@ -169,7 +169,7 @@ "- the *wilting point* represents the point below what water cannot be extracted by plant roots,\n", "- the *field capacity* represents the point after which water cannot be stored by soil any more. After that point, gravitational forces become too high and water starts to infiltrate the lower levels.\n", "\n", - "Some equations given by Saxton & Rawls (2006) are used to link both parameters to the texture of the soil. The calculation of water content at wilting point $θ_{WP}$ can be done as follows:\n", + "Some equations given by Saxton \u0026 Rawls (2006) are used to link both parameters to the texture of the soil. The calculation of water content at wilting point $θ_{WP}$ can be done as follows:\n", "\n", "$$\\theta_{WP}= \\theta_{1500t} + (0.14 \\theta_{1500t} - 0.002)$$ with:\n", "\n", @@ -195,7 +195,7 @@ "source": [ "#### Determination of soil texture and properties\n", "In the following, [OpenLandMap datasets](https://developers.google.com/earth-engine/datasets/tags/opengeohub) are used to describe clay, sand and organic carbon content of soil.\n", - "A global dataset of soil water content at the field capacity with a resolution of 250 m has been made available by Hengl & Gupta (2019). However, up to now, there is no dataset dedicated to the water content of soil at the wilting point. Consequently, in the following, both parameters will be determined considering the previous equations and using the global datasets giving the sand, clay and organic matter contents of the soil. According to the [description](https://developers.google.com/earth-engine/datasets/catalog/OpenLandMap_SOL_SOL_CLAY-WFRACTION_USDA-3A1A1A_M_v02#description), these datasets are based on machine learning predictions from global compilation of soil profiles and samples. Processing steps are described in detail [here](https://gitlab.com/openlandmap/global-layers/tree/master/soil). The information (clay, sand content, etc.) is given at 6 standard depths (0, 10, 30, 60, 100 and 200 cm) at 250 m resolution. \n", + "A global dataset of soil water content at the field capacity with a resolution of 250 m has been made available by Hengl \u0026 Gupta (2019). However, up to now, there is no dataset dedicated to the water content of soil at the wilting point. Consequently, in the following, both parameters will be determined considering the previous equations and using the global datasets giving the sand, clay and organic matter contents of the soil. According to the [description](https://developers.google.com/earth-engine/datasets/catalog/OpenLandMap_SOL_SOL_CLAY-WFRACTION_USDA-3A1A1A_M_v02#description), these datasets are based on machine learning predictions from global compilation of soil profiles and samples. Processing steps are described in detail [here](https://gitlab.com/openlandmap/global-layers/tree/master/soil). The information (clay, sand content, etc.) is given at 6 standard depths (0, 10, 30, 60, 100 and 200 cm) at 250 m resolution. \n", "\n", "These standard depths and associated bands are defined into a list as follows:" ] @@ -312,7 +312,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles=map_id_dict[\"tile_fetcher\"].url_format,\n", - " attr=\"Map Data © Google Earth Engine\",\n", + " attr=\"Map Data \u0026copy; \u003ca href='https://earthengine.google.com/'\u003eGoogle Earth Engine\u003c/a\u003e\",\n", " name=name,\n", " overlay=True,\n", " control=True,\n", @@ -497,7 +497,7 @@ "source": [ "#### Expression to calculate hydraulic properties\n", "\n", - "Now that soil properties are described, the water content at the field capacity and at the wilting point can be calculated according to the equation defined at the beginning of this section. Please note that in the equation of Saxton & Rawls (2006), the wilting point and field capacity are calculated using the Organic Matter content ($OM$) and not the Organic Carbon content ($OC$). In the following, we convert $OC$ into $OM$ using the corrective factor known as the [Van Bemmelen factor](https://inspire.ec.europa.eu/codelist/SoilDerivedObjectParameterNameValue/organicCarbonContent):\n", + "Now that soil properties are described, the water content at the field capacity and at the wilting point can be calculated according to the equation defined at the beginning of this section. Please note that in the equation of Saxton \u0026 Rawls (2006), the wilting point and field capacity are calculated using the Organic Matter content ($OM$) and not the Organic Carbon content ($OC$). In the following, we convert $OC$ into $OM$ using the corrective factor known as the [Van Bemmelen factor](https://inspire.ec.europa.eu/codelist/SoilDerivedObjectParameterNameValue/organicCarbonContent):\n", "\n", "$$0M = 1.724 \\times OC$$\n", "\n", @@ -1188,7 +1188,7 @@ "\n", "**Case 1: potential evapotranspiration is higher than precipitation.**\n", "\n", - "In that case, $PET>P$ and $APWL_{m}$ is incremented as follows:\n", + "In that case, $PET\u003eP$ and $APWL_{m}$ is incremented as follows:\n", "$APWL_{m} = APWL_{m - 1} + (PET_{m} - P_{m})$ where:\n", "- $APWL_{m}$ (respectively $APWL_{m - 1}$) represents the accumulated potential water loss for the month $m$ (respectively at the previous month $m - 1$)\n", "- $PET_{m}$ the cumulative potential evapotranspiration at month $m$,\n", @@ -1199,19 +1199,19 @@ "\n", "**Case 2: potential evapotranspiration is lower than precipitation.**\n", "\n", - "In that case, $PET ST_{FC}$ the recharge is calculated as:\n", + "If $ST_{m} \u003e ST_{FC}$ the recharge is calculated as:\n", "$R_{m} = ST_{m} - ST_{FC} + P_{m} - PET_{m}$\n", "\n", "In addition, the water stored at the end of the month $m$ becomes equal to $ST_{FC}$ and $APWL_{m}$ is set equal to zero.\n", "\n", "***Case 2.2: the storage $ST_{m}$ is less than or equal to the water stored at the field capacity.***\n", "\n", - "If $ST_{m} <= ST_{FC}$, $APWL_{m}$ is implemented as follows:\n", + "If $ST_{m} \u003c= ST_{FC}$, $APWL_{m}$ is implemented as follows:\n", "$APWL_{m} = ST_{FC} \\times \\textrm{ln}(ST_{m}/ST_{FC})$, and no percolation occurs.\n", "\n", "#### Initialization\n", @@ -1372,7 +1372,7 @@ " # logical operations.\n", "\n", " # CASE 1.\n", - " # Define zone1: the area where PET > P.\n", + " # Define zone1: the area where PET \u003e P.\n", " zone1 = pet_im.gt(pr_im)\n", "\n", " # Calculation of APWL in zone 1.\n", @@ -1388,7 +1388,7 @@ " new_st = new_st.where(zone1, zone1_st)\n", "\n", " # CASE 2.\n", - " # Define zone2: the area where PET <= P.\n", + " # Define zone2: the area where PET \u003c= P.\n", " zone2 = pet_im.lte(pr_im)\n", "\n", " # Calculate ST in zone 2.\n", @@ -1397,7 +1397,7 @@ " new_st = new_st.where(zone2, zone2_st)\n", "\n", " # CASE 2.1.\n", - " # Define zone21: the area where PET <= P and ST >= STfc.\n", + " # Define zone21: the area where PET \u003c= P and ST \u003e= STfc.\n", " zone21 = zone2.And(zone2_st.gte(stfc))\n", "\n", " # Calculate recharge in zone 21.\n", @@ -1408,7 +1408,7 @@ " new_st = new_st.where(zone21, stfc)\n", "\n", " # CASE 2.2.\n", - " # Define zone 22: the area where PET <= P and ST < STfc.\n", + " # Define zone 22: the area where PET \u003c= P and ST \u003c STfc.\n", " zone22 = zone2.And(zone2_st.lt(stfc))\n", "\n", " # Calculate APWL in zone 22.\n", @@ -1874,13 +1874,13 @@ "\n", "**Allen RG, Pereira LS, Raes D, Smith M (1998).** Crop evapotranspiration: guidelines for computing crop water requirements. *Irrigation and Drainage Paper 56*, FAO, Rome.\n", "\n", - "**Saxton, K. E., & Rawls, W. J. (2006).** Soil water characteristic estimates by texture and organic matter for hydrologic solutions. *Soil science society of America Journal*, 70(5), 1569-1578.\n", + "**Saxton, K. E., \u0026 Rawls, W. J. (2006).** Soil water characteristic estimates by texture and organic matter for hydrologic solutions. *Soil science society of America Journal*, 70(5), 1569-1578.\n", "\n", - "**Steenhuis, T. S., & Van der Molen, W. H. (1986).** The Thornthwaite-Mather procedure as a simple engineering method to predict recharge. *Journal of Hydrology*, 84(3-4), 221-229.\n", + "**Steenhuis, T. S., \u0026 Van der Molen, W. H. (1986).** The Thornthwaite-Mather procedure as a simple engineering method to predict recharge. *Journal of Hydrology*, 84(3-4), 221-229.\n", "\n", - "**Thornthwaite, C. W., & Mather, J. R. (1957).** Instructions and tables for computing potential evapotranspiration and the water balance. *Publ. Climatol.*, 10(3).\n", + "**Thornthwaite, C. W., \u0026 Mather, J. R. (1957).** Instructions and tables for computing potential evapotranspiration and the water balance. *Publ. Climatol.*, 10(3).\n", "\n", - "**Yang, Y., Donohue, R. J., & McVicar, T. R. (2016).** Global estimation of effective plant rooting depth: Implications for hydrological modeling. *Water Resources Research*, 52(10), 8260-8276.\n" + "**Yang, Y., Donohue, R. J., \u0026 McVicar, T. R. (2016).** Global estimation of effective plant rooting depth: Implications for hydrological modeling. *Water Resources Research*, 52(10), 8260-8276.\n" ] }, { diff --git a/tutorials/histogram-matching/index.ipynb b/tutorials/histogram-matching/index.ipynb index 57c25391d..d66b3184f 100644 --- a/tutorials/histogram-matching/index.ipynb +++ b/tutorials/histogram-matching/index.ipynb @@ -81,7 +81,7 @@ "source": [ "import ee\n", "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -110,33 +110,33 @@ "id": "HK_4KZ3ZdZ5M" }, "source": [ - "def lookup(source_hist, target_hist):\r\n", - " \"\"\"Creates a lookup table to make a source histogram match a target histogram.\r\n", - "\r\n", - " Args:\r\n", - " source_hist: The histogram to modify. Expects the Nx2 array format produced by ee.Reducer.autoHistogram.\r\n", - " target_hist: The histogram to match to. Expects the Nx2 array format produced by ee.Reducer.autoHistogram.\r\n", - "\r\n", - " Returns:\r\n", - " A dictionary with 'x' and 'y' properties that respectively represent the x and y\r\n", - " array inputs to the ee.Image.interpolate function.\r\n", - " \"\"\"\r\n", - "\r\n", - " # Split the histograms by column and normalize the counts.\r\n", - " source_values = source_hist.slice(1, 0, 1).project([0])\r\n", - " source_counts = source_hist.slice(1, 1, 2).project([0])\r\n", - " source_counts = source_counts.divide(source_counts.get([-1]))\r\n", - "\r\n", - " target_values = target_hist.slice(1, 0, 1).project([0])\r\n", - " target_counts = target_hist.slice(1, 1, 2).project([0])\r\n", - " target_counts = target_counts.divide(target_counts.get([-1]))\r\n", - "\r\n", - " # Find first position in target where targetCount >= srcCount[i], for each i.\r\n", - " def make_lookup(n):\r\n", - " return target_values.get(target_counts.gte(n).argmax())\r\n", - "\r\n", - " lookup = source_counts.toList().map(make_lookup)\r\n", - "\r\n", + "def lookup(source_hist, target_hist):\n", + " \"\"\"Creates a lookup table to make a source histogram match a target histogram.\n", + "\n", + " Args:\n", + " source_hist: The histogram to modify. Expects the Nx2 array format produced by ee.Reducer.autoHistogram.\n", + " target_hist: The histogram to match to. Expects the Nx2 array format produced by ee.Reducer.autoHistogram.\n", + "\n", + " Returns:\n", + " A dictionary with 'x' and 'y' properties that respectively represent the x and y\n", + " array inputs to the ee.Image.interpolate function.\n", + " \"\"\"\n", + "\n", + " # Split the histograms by column and normalize the counts.\n", + " source_values = source_hist.slice(1, 0, 1).project([0])\n", + " source_counts = source_hist.slice(1, 1, 2).project([0])\n", + " source_counts = source_counts.divide(source_counts.get([-1]))\n", + "\n", + " target_values = target_hist.slice(1, 0, 1).project([0])\n", + " target_counts = target_hist.slice(1, 1, 2).project([0])\n", + " target_counts = target_counts.divide(target_counts.get([-1]))\n", + "\n", + " # Find first position in target where targetCount \u003e= srcCount[i], for each i.\n", + " def make_lookup(n):\n", + " return target_values.get(target_counts.gte(n).argmax())\n", + "\n", + " lookup = source_counts.toList().map(make_lookup)\n", + "\n", " return {'x': source_values.toList(), 'y': lookup}" ], "execution_count": null, @@ -306,7 +306,7 @@ "\r\n", " qa = image.select('pixel_qa')\r\n", " return (image.updateMask(\r\n", - " qa.bitwiseAnd(1 << 3).eq(0).And(qa.bitwiseAnd(1 << 5).eq(0)))\r\n", + " qa.bitwiseAnd(1 \u003c\u003c 3).eq(0).And(qa.bitwiseAnd(1 \u003c\u003c 5).eq(0)))\r\n", " .divide(10000)\r\n", " .select(['B4', 'B3', 'B2'], ['R', 'G', 'B'])\r\n", " .copyProperties(image, ['system:time_start']))\r\n", @@ -363,7 +363,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\r\n", " folium.raster_layers.TileLayer(\r\n", " tiles=map_id_dict['tile_fetcher'].url_format,\r\n", - " attr='Map Data © Google Earth Engine',\r\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\r\n", " name=name,\r\n", " overlay=True,\r\n", " control=True\r\n", @@ -422,4 +422,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/intro-to-python-api/index.ipynb b/tutorials/intro-to-python-api/index.ipynb index ffddf8bb7..47c835678 100644 --- a/tutorials/intro-to-python-api/index.ipynb +++ b/tutorials/intro-to-python-api/index.ipynb @@ -101,7 +101,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -522,7 +522,7 @@ "id": "HH5517P-R5E-" }, "source": [ - "Then, we use the `getThumbUrl()` method to get a URL and we can use the IPython library to display the mean daytime LST map for the region of interest. Blue represents the coldest areas (< 10°C) and red represents the warmest areas (> 30°C) (note that it may take a moment for the image to load after the cell completes execution)." + "Then, we use the `getThumbUrl()` method to get a URL and we can use the IPython library to display the mean daytime LST map for the region of interest. Blue represents the coldest areas (\u003c 10°C) and red represents the warmest areas (\u003e 30°C) (note that it may take a moment for the image to load after the cell completes execution)." ] }, { @@ -778,7 +778,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles=map_id_dict['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " name=name,\n", " overlay=True,\n", " control=True\n", @@ -920,4 +920,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/sentinel-2-s2cloudless/index.ipynb b/tutorials/sentinel-2-s2cloudless/index.ipynb index 5f4bbfc1a..6244fcc54 100644 --- a/tutorials/sentinel-2-s2cloudless/index.ipynb +++ b/tutorials/sentinel-2-s2cloudless/index.ipynb @@ -81,7 +81,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -383,7 +383,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles=map_id_dict['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine\u003c/a\u003e',\n", " name=name,\n", " show=show,\n", " opacity=opacity,\n", @@ -657,4 +657,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/time-series-visualization-with-altair/index.ipynb b/tutorials/time-series-visualization-with-altair/index.ipynb index 769286886..5114159ce 100644 --- a/tutorials/time-series-visualization-with-altair/index.ipynb +++ b/tutorials/time-series-visualization-with-altair/index.ipynb @@ -126,7 +126,7 @@ "1. Filter the dataset (server-side Earth Engine)\n", "2. Reduce the data region by a statistic (server-side Earth Engine)\n", "3. Format the region reduction into a table (server-side Earth Engine)\n", - "4. Convert the Earth Engine table to a DataFrame (server-side Earth Engine > client-side Python kernel)\n", + "4. Convert the Earth Engine table to a DataFrame (server-side Earth Engine \u003e client-side Python kernel)\n", "5. Alter the DataFrame (client-side pandas)\n", "6. Plot the DataFrame (client-side Altair)\n", "\n", @@ -163,7 +163,7 @@ "source": [ "import ee\n", "ee.Authenticate()\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": null, "outputs": [] @@ -1056,8 +1056,8 @@ "source": [ "ndvi_doy_range = [224, 272]\n", "\n", - "ndvi_df_sub = ndvi_df[(ndvi_df['DOY'] >= ndvi_doy_range[0])\n", - " & (ndvi_df['DOY'] <= ndvi_doy_range[1])]\n", + "ndvi_df_sub = ndvi_df[(ndvi_df['DOY'] \u003e= ndvi_doy_range[0])\n", + " \u0026 (ndvi_df['DOY'] \u003c= ndvi_doy_range[1])]\n", "\n", "ndvi_df_sub = ndvi_df_sub.groupby('Year').agg('min')" ], @@ -1091,8 +1091,8 @@ "source": [ "pdsi_doy_range = [1, 272]\n", "\n", - "pdsi_df_sub = pdsi_df[(pdsi_df['DOY'] >= pdsi_doy_range[0])\n", - " & (pdsi_df['DOY'] <= pdsi_doy_range[1])]\n", + "pdsi_df_sub = pdsi_df[(pdsi_df['DOY'] \u003e= pdsi_doy_range[0])\n", + " \u0026 (pdsi_df['DOY'] \u003c= pdsi_doy_range[1])]\n", "\n", "pdsi_df_sub = pdsi_df_sub.groupby('Year').agg('mean')" ], @@ -1247,7 +1247,7 @@ " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", " folium.raster_layers.TileLayer(\n", " tiles=map_id_dict['tile_fetcher'].url_format,\n", - " attr='Map Data © Google Earth Engine, USDA National Agriculture Imagery Program',\n", + " attr='Map Data \u0026copy; \u003ca href=\"https://earthengine.google.com/\"\u003eGoogle Earth Engine, USDA National Agriculture Imagery Program\u003c/a\u003e',\n", " name=name,\n", " overlay=True,\n", " control=True).add_to(self)\n", @@ -1344,7 +1344,7 @@ " - [ETM+](https://www.usgs.gov/land-resources/nli/landsat/landsat-7?qt-science_support_page_related_con=0#qt-science_support_page_related_con) (Landsat's Enhanced Thematic Mapper Plus sensor)\n", " - [TM](https://www.usgs.gov/land-resources/nli/landsat/landsat-5?qt-science_support_page_related_con=0#qt-science_support_page_related_con) (Landsat's Thematic Mapper sensor)\n", " - [CFMask](https://www.usgs.gov/land-resources/nli/landsat/cfmask-algorithm) (Landsat USGS surface reflectance mask based on the CFMask algorithm)\n", - " - [NBR](https://www.usgs.gov/land-resources/nli/landsat/landsat-normalized-burn-ratio#:~:text=NBR%20is%20used%20to%20identify,SWIR%20values%20in%20traditional%20fashion.&text=In%20Landsat%204%2D7%2C%20NBR,Band%205%20%2B%20Band%207). (Normalized Burn Ratio: a spectral vegetation index)\n", + " - [NBR](https://www.usgs.gov/land-resources/nli/landsat/landsat-normalized-burn-ratio#:~:text=NBR%20is%20used%20to%20identify,SWIR%20values%20in%20traditional%20fashion.\u0026text=In%20Landsat%204%2D7%2C%20NBR,Band%205%20%2B%20Band%207). (Normalized Burn Ratio: a spectral vegetation index)\n", " - Understanding [Earth Engine joins](https://developers.google.com/earth-engine/joins_intro)" ] }, @@ -1371,8 +1371,8 @@ "\n", "# Define a function to mask out clouds and cloud shadows.\n", "def cfmask(img):\n", - " cloud_shadow_bi_mask = 1 << 3\n", - " cloud_bit_mask = 1 << 5\n", + " cloud_shadow_bi_mask = 1 \u003c\u003c 3\n", + " cloud_bit_mask = 1 \u003c\u003c 5\n", " qa = img.select('pixel_qa')\n", " mask = qa.bitwiseAnd(cloud_shadow_bi_mask).eq(0).And(\n", " qa.bitwiseAnd(cloud_bit_mask).eq(0))\n", @@ -1495,7 +1495,7 @@ "id": "uCkzwsWxdgfN" }, "source": [ - "4. Transfer data from the server to the client.
\n", + "4. Transfer data from the server to the client.\u003cbr\u003e\n", "_Note: if the process times out, you'll need to export/import the `nbr_stat_fc` feature collection as described in the **Optional export** section_.\n", "5. Convert the Python dictionary to a pandas DataFrame.\n", "6. Preview the DataFrame and check data types." @@ -1759,7 +1759,7 @@ "\n", "1. Import the collection and filter by date.\n", "2. Reduce the collection images by region and filter null computed values.\n", - "3. Convert the feature collection to a dictionary and transfer it client-side.
\n", + "3. Convert the feature collection to a dictionary and transfer it client-side.\u003cbr\u003e\n", "_Note: if the process times out, you'll need to export/import the `prism_stat_fc` feature collection as described in the **Optional export** section_.\n", "4. Convert the dictionary to a DataFrame.\n", "5. Preview the DataFrame." @@ -1926,4 +1926,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/tutorial-template.ipynb b/tutorials/tutorial-template.ipynb index e0dab3541..92eab6df3 100644 --- a/tutorials/tutorial-template.ipynb +++ b/tutorials/tutorial-template.ipynb @@ -81,7 +81,7 @@ "ee.Authenticate()\n", "\n", "# Initialize the library.\n", - "ee.Initialize()" + "ee.Initialize(project='my-project')" ], "execution_count": 0, "outputs": [] @@ -110,9 +110,9 @@ "\n", "1. Once your proposal is approved, [fork the Earth Engine Community repository](https://github.com/google/earthengine-community/fork) to your personal account.\n", "\n", - "1. In Colab, click \"File > Save a copy in GitHub\", granting Colab permission to write to your personal repo as necessary.\n", + "1. In Colab, click \"File \u003e Save a copy in GitHub\", granting Colab permission to write to your personal repo as necessary.\n", "\n", - " > Note: Pull requests are linked to the branch from which they were created. If you plan to have more than one tutorial out for review at a time, you will need to create a separate branch for each pull request ([instructions](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository)).\n", + " \u003e Note: Pull requests are linked to the branch from which they were created. If you plan to have more than one tutorial out for review at a time, you will need to create a separate branch for each pull request ([instructions](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository)).\n", "\n", "1. In the \"Copy to GitHub\" dialog that appears, select the `master` branch of the fork created above.\n", "\n", @@ -146,4 +146,4 @@ ] } ] -} \ No newline at end of file +}