Skip to content

Commit

Permalink
Merge pull request #155 from wri/develop
Browse files Browse the repository at this point in the history
Develop to master
  • Loading branch information
jterry64 authored May 4, 2022
2 parents 05e757a + b4dbd4c commit 8c988f7
Show file tree
Hide file tree
Showing 43 changed files with 487 additions and 426 deletions.
52 changes: 26 additions & 26 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ assembly / assemblyMergeStrategy := {
import sbtlighter._
import com.amazonaws.services.elasticmapreduce.model.Tag

// Always check Spot prices for instance type and select subnet based on bested price
// Always check Spot prices for instance type and select subnet based on best price
// GFW subnet zone us-east-1a: subnet-00335589f5f424283
// GFW subnet zone us-east-1b: subnet-8c2b5ea1
// GFW subnet zone us-east-1c: subnet-08458452c1d05713b
Expand All @@ -180,22 +180,22 @@ import com.amazonaws.services.elasticmapreduce.model.Tag
sparkEmrRelease := "emr-6.1.0"
sparkAwsRegion := "us-east-1"
sparkEmrApplications := Seq("Spark", "Zeppelin", "Ganglia")
sparkS3JarFolder := "s3://gfw-pipelines-dev/geotrellis/jars"
sparkS3LogUri := Some("s3://gfw-pipelines-dev/geotrellis/logs")
sparkSubnetId := Some("subnet-067b91868bc3a8ff1")
sparkSecurityGroupIds := Seq("sg-07c11c0c9189c0a7a", "sg-068c422162d468700")
sparkInstanceCount := 21 // 201 for carbonflux and carbon_sensitivity
sparkS3JarFolder := "s3://wri-users/dgibbs/geotrellis/jars"
sparkS3LogUri := Some("s3://wri-users/dgibbs/geotrellis/logs")
sparkSubnetId := Some("subnet-8c2b5ea1")
sparkSecurityGroupIds := Seq("sg-00ca15563a40c5687", "sg-6c6a5911")
sparkInstanceCount := 201 // 201 for carbonflux and carbon_sensitivity
sparkMasterType := "r4.2xlarge"
sparkCoreType := "r4.2xlarge"
sparkMasterEbsSize := Some(10)
sparkCoreEbsSize := Some(10)
//sparkMasterPrice := Some(3.0320)
sparkCorePrice := Some(0.532)
sparkClusterName := s"geotrellis-forest-change-diagnostic"
sparkClusterName := s"geotrellis-treecoverloss"
sparkEmrServiceRole := "EMR_DefaultRole"
sparkInstanceRole := "EMR_EC2_DefaultRole"
sparkJobFlowInstancesConfig := sparkJobFlowInstancesConfig.value.withEc2KeyName(
"tmaschler_gfw"
"dgibbs_wri"
)
sparkEmrBootstrap := List(
BootstrapAction(
Expand All @@ -206,12 +206,12 @@ sparkEmrBootstrap := List(
)
sparkRunJobFlowRequest := sparkRunJobFlowRequest.value
.withTags(new Tag("Project", "Global Forest Watch"))
.withTags(new Tag("Job", "Tree Cover Loss Analysis Geotrellis"))
.withTags(new Tag("Project Lead", "Thomas Maschler"))
.withTags(new Tag("Job", "Carbon Flux Analysis Geotrellis"))
.withTags(new Tag("Project Lead", "David Gibbs"))
.withTags(new Tag("Name", "geotrellis-treecoverloss"))
sparkEmrConfigs := List(
// reference to example by geotrellis: https://github.com/geotrellis/geotrellis-spark-job.g8/blob/master/src/main/g8/build.sbt#L70-L91
EmrConfig("spark").withProperties("maximizeResourceAllocation" -> "true"),
// EmrConfig("spark").withProperties("maximizeResourceAllocation" -> "true"),
EmrConfig("emrfs-site").withProperties("fs.s3.useRequesterPaysHeader" -> "true"),
EmrConfig("spark-defaults").withProperties(
// https://aws.amazon.com/blogs/big-data/best-practices-for-successfully-managing-memory-for-apache-spark-applications-on-amazon-emr/
Expand All @@ -235,15 +235,15 @@ sparkEmrConfigs := List(
// spark.default.parallelism = spark.executor.instances * spark.executors.cores * 2
// spark.sql.shuffle.partitions = spark.default.parallelism

// "spark.dynamicAllocation.enabled" -> "false",
// "spark.executor.cores" -> "1", //5",
// "spark.executor.memory" -> "5652m", //37G
// "spark.executor.memoryOverhead" -> "2g", //5G
// "spark.driver.cores" -> "1",
// "spark.driver.memory" -> "6652m",
// "spark.executor.instances" -> "159", // 1599 for carbonflux and carbon_sensitivity
// "spark.default.parallelism" -> "1590", // 15990 for carbonflux and carbon_sensitivity
// "spark.sql.shuffle.partitions" -> "1590", // 15990 for carbonflux and carbon_sensitivity
"spark.dynamicAllocation.enabled" -> "false",
"spark.executor.cores" -> "1", //5",
"spark.executor.memory" -> "5652m", //37G
"spark.executor.memoryOverhead" -> "2g", //5G
"spark.driver.cores" -> "1",
"spark.driver.memory" -> "6652m",
"spark.executor.instances" -> "1599", // 1599 for carbonflux and carbon_sensitivity
"spark.default.parallelism" -> "15990", // 15990 for carbonflux and carbon_sensitivity
"spark.sql.shuffle.partitions" -> "15990", // 15990 for carbonflux and carbon_sensitivity

"spark.shuffle.spill.compress" -> "true",
"spark.driver.maxResultSize" -> "3G",
Expand All @@ -254,20 +254,20 @@ sparkEmrConfigs := List(
"spark.executorEnv.LD_LIBRARY_PATH" -> "/usr/local/miniconda/lib/:/usr/local/lib",
"spark.dynamicAllocation.enabled" -> "true",

// Use these GC strategy as default
"spark.driver.defaultJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",
"spark.executor.defaultJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",
// // Use these GC strategy as default
// "spark.driver.defaultJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",
// "spark.executor.defaultJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p'",

// "spark.kryoserializer.buffer.max" -> "2047m",

// Best practice 4: Always set up a garbage collector when handling large volume of data through Spark.
// Use these GC strategy to avoid java.lang.OutOfMemoryError: GC overhead limit exceeded
// "spark.executor.defaultJavaOptions" -> "-XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'",
// "spark.driver.defaultJavaOptions" -> "-XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'",
"spark.executor.defaultJavaOptions" -> "-XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'",
"spark.driver.defaultJavaOptions" -> "-XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'",

// set this environment variable for GDAL to use request payer method for S3 files
"spark.yarn.appMasterEnv.AWS_REQUEST_PAYER" -> "requester",
"spark.yarn.executorEnv.AWS_REQUEST_PAYER" -> "requester",
"spark.executorEnv.AWS_REQUEST_PAYER" -> "requester",

),
// EmrConfig("spark-env").withProperties(
Expand Down
11 changes: 6 additions & 5 deletions src/main/resources/raster-catalog-default.json
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,7 @@
"name":"wur_radd_alerts",
"source_uri":"s3://gfw-data-lake/wur_radd_alerts/latest/raster/epsg-4326/{grid_size}/{row_count}/date_conf/geotiff/{tile_id}.tif"
},
{
"name":"gfw_plantations",
"source_uri":"s3://gfw-data-lake/gfw_plantations/v2014/raster/epsg-4326/{grid_size}/{row_count}/type/geotiff/{tile_id}.tif"
},

{
"name":"gfw_wood_fiber",
"source_uri":"s3://gfw-data-lake/gfw_wood_fiber/v202106/raster/epsg-4326/{grid_size}/{row_count}/is/geotiff/{tile_id}.tif"
Expand Down Expand Up @@ -329,7 +326,11 @@
"source_uri": "s3://gfw-data-lake/gfw_full_extent_non_co2_gross_emissions/v20220316/raster/epsg-4326/{grid_size}/{row_count}/Mg_CO2e_ha-1/geotiff/{tile_id}.tif"
},
{
"name":"fao_ecozones",
"name":"fao_ecozones_2000",
"source_uri": "s3://gfw-data-lake/fao_ecozones/v2000/raster/epsg-4326/{grid_size}/{row_count}/class/geotiff/{tile_id}.tif"
},
{
"name":"fao_ecozones_2010",
"source_uri": "s3://gfw-data-lake/fao_ecozones/v2010/raster/epsg-4326/{grid_size}/{row_count}/class/geotiff/{tile_id}.tif"
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/org/globalforestwatch/layers/Agc2000.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ case class Agc2000(gridTile: GridTile, model: String = "standard", kwargs: Map[S


val uri: String =
s"$basePath/gfw_aboveground_carbon_stock_2000/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
// s"$basePath/gfw_aboveground_carbon_stock_2000/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_0/agc_2000/standard/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class AgcEmisYear(gridTile: GridTile, model: String = "standard", kwargs: M

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_aboveground_carbon_stock_in_emissions_year$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/agc_emis_year/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/agc_emis_year/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class BgcEmisYear(gridTile: GridTile, model: String = "standard", kwargs: M

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_belowground_carbon_stock_in_emissions_year$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg_ha-1/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/bgc_emis_year/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/bgc_emis_year/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ case class BurnYearHansenLoss(gridTile: GridTile, kwargs: Map[String, Any])


val uri: String =
// s"$basePath/gfw_burn_year_Hansen_loss/v20190816/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/year/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/burn_year_with_Hansen_loss/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/burn_year_with_Hansen_loss/${gridTile.tileId}.tif"

override def lookup(value: Int): Integer =
if (value == 0) null else value + 2000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class DeadwoodCarbonEmisYear(gridTile: GridTile, model: String = "standard"

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_deadwood_carbon_stock_in_emissions_year$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/deadwood_emis_year/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/deadwood_emis_year/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class FaoEcozones(gridTile: GridTile, model: String = "standard", kwargs: Map[String, Any])
case class FaoEcozones2000(gridTile: GridTile, model: String = "standard", kwargs: Map[String, Any])
extends StringLayer
with OptionalILayer {

val datasetName = "fao_ecozones"
val datasetName = "fao_ecozones_2000"
val uri: String = uriForGrid(gridTile)

def lookup(value: Int): String = value match {
Expand Down
36 changes: 36 additions & 0 deletions src/main/scala/org/globalforestwatch/layers/FaoEcozones2010.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class FaoEcozones2010(gridTile: GridTile, model: String = "standard", kwargs: Map[String, Any])
extends StringLayer
with OptionalILayer {

val datasetName = "fao_ecozones_2010"
val uri: String = uriForGrid(gridTile)

def lookup(value: Int): String = value match {
case 1 => "Boreal coniferous forest"
case 2 => "Boreal mountain system"
case 3 => "Boreal tundra woodland"
case 4 => "No data"
case 5 => "Polar"
case 6 => "Subtropical desert"
case 7 => "Subtropical dry forest"
case 8 => "Subtropical humid forest"
case 9 => "Subtropical mountain system"
case 10 => "Subtropical steppe"
case 11 => "Temperate continental forest"
case 12 => "Temperate desert"
case 13 => "Temperate mountain system"
case 14 => "Temperate oceanic forest"
case 15 => "Temperate steppe"
case 16 => "Tropical desert"
case 17 => "Tropical dry forest"
case 18 => "Tropical moist deciduous forest"
case 19 => "Tropical mountain system"
case 20 => "Tropical rainforest"
case 21 => "Tropical shrubland"
case 22 => "Water"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class FluxModelExtent(gridTile: GridTile, model: String = "standard", kwarg
// val model_suffix = if (model == "standard") "" else s"__$model"
val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_flux_model_extent$model_suffix/v20191031/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/is/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/model_extent/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/model_extent/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@ case class GrossAnnualAbovegroundRemovalsCarbon(gridTile: GridTile,

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_gross_annual_aboveground_removals_carbon$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg_ha-1/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/annual_removal_factor_AGC_all_forest_types/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/annual_removal_factor_AGC_all_forest_types/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class GrossAnnualBelowgroundRemovalsCarbon(gridTile: GridTile, model: Strin

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_gross_annual_belowground_removals_carbon$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg_ha-1/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/annual_removal_factor_BGC_all_forest_types/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/annual_removal_factor_BGC_all_forest_types/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class GrossEmissionsCo2OnlyCo2eBiomassSoil(gridTile: GridTile,
model: String = "standard", kwargs: Map[String, Any])
extends FloatLayer
with OptionalFLayer {

val datasetName = "Na"


val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
s"s3://gfw-files/flux_1_2_2/gross_emissions_co2_only_co2e/$model_suffix/${gridTile.tileId}.tif"

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class GrossEmissionsCo2OnlyCo2eSoilOnly(gridTile: GridTile, kwargs: Map[String, Any])
extends FloatLayer
with OptionalFLayer {

val datasetName = "Na"


val uri: String =
s"s3://gfw-files/flux_1_2_2/gross_emissions_co2_only_co2e/soil_only/${gridTile.tileId}.tif"

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ case class GrossEmissionsNodeCodes(gridTile: GridTile,

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_emissions_node_codes$model_suffix/v20200824/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/name/geotiff/${gridTile.tileId}.tif"
// s"s3://gfw-files/flux_1_2_1/gross_emissions_node_codes/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_0/gross_emissions_node_codes/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/gross_emissions_node_codes/$model_suffix/${gridTile.tileId}.tif"

override val externalNoDataValue = "Not applicable"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class GrossEmissionsNonCo2Co2eBiomassSoil(gridTile: GridTile,
model: String = "standard", kwargs: Map[String, Any])
extends FloatLayer
with OptionalFLayer {

val datasetName = "Na"


val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
s"s3://gfw-files/flux_1_2_2/gross_emissions_non_co2_co2e/$model_suffix/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.globalforestwatch.layers

import org.globalforestwatch.grids.GridTile

case class GrossEmissionsNonCo2Co2eSoilOnly(gridTile: GridTile, kwargs: Map[String, Any])
extends FloatLayer
with OptionalFLayer {

val datasetName = "Na"


val uri: String =
s"s3://gfw-files/flux_1_2_2/gross_emissions_non_co2_co2e/soil_only/${gridTile.tileId}.tif"
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ case class LitterCarbonEmisYear(gridTile: GridTile, model: String = "standard",

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_litter_carbon_stock_in_emissions_year$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/litter_emis_year/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/litter_emis_year/$model_suffix/${gridTile.tileId}.tif"
}
37 changes: 0 additions & 37 deletions src/main/scala/org/globalforestwatch/layers/Plantations.scala

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@ case class RemovalForestType(gridTile: GridTile, model: String = "standard", kwa
val model_suffix: String = if (model == "standard") "standard" else s"$model"

val uri: String =
// s"$basePath/gfw_removal_forest_type$model_suffix/v20150601/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/type/geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/removal_forest_type/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/removal_forest_type/$model_suffix/${gridTile.tileId}.tif"

override val externalNoDataValue = "Not applicable"

def lookup(value: Int): String = value match {
case 1 =>
"IPCC Table 4.9 default old (>20 year) secondary and primary rates"
case 1 => "IPCC Table 4.9 default old (>20 year) secondary and primary rates"
case 2 => "Young (<20 year) natural forest rates (Cook-Patton et al. 2020)"
case 3 => "US-specific rates (USFS FIA)"
case 4 => "Planted forest rates"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ case class SoilCarbonEmisYear(gridTile: GridTile, model: String = "standard", kw

val model_suffix: String = if (model == "standard") "standard" else s"$model"
val uri: String =
// s"$basePath/gfw_soil_carbon_stock_in_emissions_year$model_suffix/v20191106/raster/epsg-4326/${gridTile.gridSize}/${gridTile.rowCount}/Mg/gdal-geotiff/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_1/soil_emis_year/$model_suffix/${gridTile.tileId}.tif"
s"s3://gfw-files/flux_1_2_2/soil_emis_year/$model_suffix/${gridTile.tileId}.tif"
}
Loading

0 comments on commit 8c988f7

Please sign in to comment.