Skip to content

Commit

Permalink
Disable dataproc enhanced optimizer configs
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>

Fixes NVIDIA#1550

For dataproc GPU clusters, the Autotuner should set the following 2
properties to false. In addition, the autotuner appends a comment to
warn the user that those properties might be problematic to the GPU run.

- "spark.dataproc.enhanced.optimizer.enabled": "false"
- "spark.dataproc.enhanced.execution.enabled": "false"

The autotuner output will add a message stating that:

"should be disabled. WARN: Turning this property on might case the GPU
accelerated Dataproc cluster to hang."
  • Loading branch information
amahussein committed Feb 20, 2025
1 parent 78cab00 commit ca17528
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 82 deletions.
8 changes: 6 additions & 2 deletions core/src/main/resources/bootstrap/tuningTable.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,19 @@ tuningDefinitions:
level: job
category: tuning
- label: spark.dataproc.enhanced.execution.enabled
description: 'Enables enhanced execution. It is recommended to turn it on for better performance on Dataproc.'
description: 'Enables enhanced execution. Turning this on might cause the accelerated dataproc cluster to hang.'
enabled: true
level: job
category: tuning
comments:
persistent: 'should be disabled. WARN: Turning this property on might case the GPU accelerated Dataproc cluster to hang.'
- label: spark.dataproc.enhanced.optimizer.enabled
description: 'Enables enhanced optimizer. It is recommended to turn it on for better performance on Dataproc.'
description: 'Enables enhanced optimizer. Turning this on might cause the accelerated dataproc cluster to hang.'
enabled: true
level: job
category: tuning
comments:
persistent: 'should be disabled. WARN: Turning this property on might case the GPU accelerated Dataproc cluster to hang.'
- label: spark.executor.cores
description: 'The number of cores to use on each executor. It is recommended to be set to 16'
enabled: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -591,8 +591,10 @@ class DataprocPlatform(gpuDevice: Option[GpuDevice],
override val platformName: String = PlatformNames.DATAPROC
override val defaultGpuDevice: GpuDevice = T4Gpu
override val recommendationsToInclude: Seq[(String, String)] = Seq(
"spark.dataproc.enhanced.optimizer.enabled" -> "true",
"spark.dataproc.enhanced.execution.enabled" -> "true"
// Keep disabled. This property does not work well with GPU clusters.
"spark.dataproc.enhanced.optimizer.enabled" -> "false",
// Keep disabled. This property does not work well with GPU clusters.
"spark.dataproc.enhanced.execution.enabled" -> "false"
)

override def isPlatformCSP: Boolean = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,17 +330,47 @@ class AutoTuner(
}
}

/**
* Add default missing comments from the tuningEntry table if any.
* @param key the property set by the autotuner.
*/
private def appendMissingComment(key: String): Unit = {
val missingComment = TuningEntryDefinition.TUNING_TABLE.get(key)
.flatMap(_.getMissingComment())
.getOrElse(s"'$key' was not set.")
appendComment(missingComment)
}

/**
* Append a comment to the list by looking up the persistent comment if any in the tuningEntry
* table.
* @param key the property set by the autotuner.
*/
private def appendPersistentComment(key: String): Unit = {
TuningEntryDefinition.TUNING_TABLE.get(key).foreach { eDef =>
eDef.getPersistentComment().foreach { comment =>
appendComment(s"'$key' $comment")
}
}
}

def appendRecommendation(key: String, value: String): Unit = {
if (!skippedRecommendations.contains(key)) {
val recomRecord = recommendations.getOrElseUpdate(key,
TuningEntry.build(key, getPropertyValue(key), None))
if (value != null) {
recomRecord.setRecommendedValue(value)
if (recomRecord.originalValue.isEmpty) {
// add a comment that the value was missing in the cluster properties
appendComment(s"'$key' was not set.")
}
if (skippedRecommendations.contains(key)) {
// do not do anything if the recommendations should be skipped
return
}
// Update the recommendation entry or update the existing one.
val recomRecord = recommendations.getOrElseUpdate(key,
TuningEntry.build(key, getPropertyValue(key), None))
// if the value is not null, then proceed to add the recommendation.
Option(value).foreach { nonNullValue =>
recomRecord.setRecommendedValue(nonNullValue)
if (recomRecord.originalValue.isEmpty) {
// add missing comment if any
appendMissingComment(key)
}
// add the persistent comment if any.
appendPersistentComment(key)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@

package com.nvidia.spark.rapids.tool.tuning

import java.util

import scala.beans.BeanProperty
import scala.collection.JavaConverters._
import scala.collection.breakOut

import org.yaml.snakeyaml.{DumperOptions, LoaderOptions, Yaml}
import org.yaml.snakeyaml.constructor.Constructor
import org.yaml.snakeyaml.representer.Representer
import scala.collection.JavaConverters._
import scala.collection.breakOut

import org.apache.spark.sql.rapids.tool.util.UTF8Source

Expand All @@ -40,6 +42,14 @@ import org.apache.spark.sql.rapids.tool.util.UTF8Source
* Default is true.
* @param defaultSpark The default value of the property in Spark. This is used to set the
* originalValue of the property in case it is not set by the eventlog.
* @param comments The defaults comments to be loaded for the entry. It is a map to represent
* three different types of comments:
* 1. "missing" to represent the default comment to be appended to the AutoTuner's
* comment when the property is missing.
* 2. "persistent" to represent a comment that always shows up in the AutoTuner's
* output.
* 3. "updated" to represent a comment that shows when a property is being set by
* the Autotuner.
*/
class TuningEntryDefinition(
@BeanProperty var label: String,
Expand All @@ -48,10 +58,12 @@ class TuningEntryDefinition(
@BeanProperty var level: String,
@BeanProperty var category: String,
@BeanProperty var bootstrapEntry: Boolean,
@BeanProperty var defaultSpark: String) {
@BeanProperty var defaultSpark: String,
@BeanProperty var comments: util.LinkedHashMap[String, String]) {
def this() = {
this(label = "", description = "", enabled = true, level = "", category = "",
bootstrapEntry = true, defaultSpark = null)
bootstrapEntry = true, defaultSpark = null,
comments = new util.LinkedHashMap[String, String]())
}

def isEnabled(): Boolean = {
Expand All @@ -69,6 +81,18 @@ class TuningEntryDefinition(
def hasDefaultSpark(): Boolean = {
defaultSpark != null
}

def getMissingComment(): Option[String] = {
Option(comments.get("missing"))
}

def getPersistentComment(): Option[String] = {
Option(comments.get("persistent"))
}

def getUpdatedComment(): Option[String] = {
Option(comments.get("updated"))
}
}

class TuningEntries(
Expand Down
Loading

0 comments on commit ca17528

Please sign in to comment.