Skip to content

Commit

Permalink
Disable Per-SQL summary text output (#1530)
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed Hussein (amahussein) <a@ahussein.me>

Fixes #1527

Disable the text format output generated per-sql. The target is to
reduce the noise of the stdout and improve the performance of the
core-tools

### Impact on the output:

- `rapids_4_spark_qualification_output_persql.log` is not generated
anymore by the qualTool
- remove column `AppName` from
`rapids_4_spark_qualification_output_persql.csv`
- the `rapids_4_spark_qualification_output_persql.log` can still be
generated by the RunningQualificationApp
- The order of the SQls in the CSV file has changed. Sorted Desc based
on (GPU opportunity, and DF duration) whithin each app. Perviously, the
SQLs were sorted globally which might cause considerable overhead for a
large number of eventogs.

### Impact on Performance and usability:

- Improve readability of the stdout/log generated by the tools.
- Reduce the size of lines consumed by the python wrapper.
- Sorting Sqls per-app implies less memory requirements since it is only
required to maintain the list of SQL for the current iteration.
- Improve the string construction by avoiding filling `Buffer<String,
Int>`
- Improve the performance by skipping generating and writing the log
file to the disk.
  • Loading branch information
amahussein authored Feb 10, 2025
1 parent c8fedd7 commit 38aad12
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,25 +191,65 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean,
}
}

/**
* Write the per SQL CSV report. This is used by the QualificationApp to write CSV report for the
* SQLs in the application.
* The SQLs within each app are sorted DESC by GPU opportunity and SQL DF Duration in
* descending order.
* Note that the caller takes the responsibility of sorting the App summaries; which determines
* the order of each AppID in the report.
*
* The implementation is optimized as follows:
* - reduce the objects allocations resulting avoiding using ListBuffer(string, int).
* - uses raw interpolation which is faster way of concatenating strings.
* - does not sort all the SQLs of the applications. Instead, sort SQLs locally per app which
* reduces the size of the sorting data.
*
* @param sums list of QualificationSummaryInfo
* @param maxSQLDescLength the maximum length allowed in the SQl description field.
*/
def writePerSqlCSVReport(sums: Seq[QualificationSummaryInfo], maxSQLDescLength: Int): Unit = {
val delimiter = QualOutputWriter.CSV_DELIMITER
val emptyString = StringUtils.reformatCSVString("")

def constructRowFromPerSqlSummary(
appID: String, sumInfo: EstimatedPerSQLSummaryInfo): String = {
val rootID = sumInfo.rootExecutionID match {
case Some(id) => StringUtils.reformatCSVString(id.toString)
case _ => emptyString
}
val sqlDescr =
StringUtils.reformatCSVString(
QualOutputWriter.formatSQLDescription(sumInfo.sqlDesc, maxSQLDescLength, delimiter))
// Use raw interpolation which has better performance compared to sInterpolation because it
// does not process escaped characters.
raw"$appID$delimiter$rootID$delimiter${sumInfo.sqlID}$delimiter$sqlDescr$delimiter" +
raw"${sumInfo.info.sqlDfDuration}$delimiter${sumInfo.info.gpuOpportunity}"
}

val csvFileWriter = new ToolTextFileWriter(outputDir,
s"${QualOutputWriter.LOGFILE_NAME}_persql.csv",
"Per SQL CSV Report", hadoopConf)
try {
val appNameSize = QualOutputWriter.getAppNameSize(sums)
val appIdSize = QualOutputWriter.getAppIdSize(sums)
val sqlDescSize =
QualOutputWriter.getSqlDescSize(sums, maxSQLDescLength, QualOutputWriter.CSV_DELIMITER)
val headersAndSizes =
QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, appIdSize, sqlDescSize)
csvFileWriter.write(QualOutputWriter.constructDetailedHeader(headersAndSizes,
QualOutputWriter.CSV_DELIMITER, false))
val appIdMaxSize = QualOutputWriter.getAppIdSize(sums)
val sortedInfo = sortPerSqlInfo(sums)
sortedInfo.foreach { sumInfo =>
val row = QualOutputWriter.constructPerSqlSummaryInfo(sumInfo, headersAndSizes,
appIdMaxSize, ",", false, maxSQLDescLength)
csvFileWriter.write(row)
csvFileWriter.write(
QualOutputWriter.constructOutputRowFromMap(QualOutputWriter.getPerSqlHeaderStrings,
QualOutputWriter.CSV_DELIMITER))
// Write the perSQL info for each app.
sums.foreach { sum =>
sum.perSQLEstimatedInfo match {
case Some(perSqlArr) =>
if (perSqlArr.nonEmpty) {
val appIDStr = StringUtils.reformatCSVString(sum.appId)
perSqlArr.sortBy(sum => {
(-sum.info.gpuOpportunity, -sum.info.appDur)
}).foreach { sqlInfo =>
csvFileWriter.write(constructRowFromPerSqlSummary(appIDStr, sqlInfo))
// add new line separately to avoid processing escape characters.
csvFileWriter.write("\n")
}
}
case _ => // Do nothing
}
}
} finally {
csvFileWriter.close()
Expand All @@ -228,15 +268,15 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean,
sortedAsc.reverse
}
}

private def writePerSqlTextSummary(writer: ToolTextFileWriter,
sums: Seq[QualificationSummaryInfo],
numOutputRows: Int, maxSQLDescLength: Int): Unit = {
val appNameSize = QualOutputWriter.getAppNameSize(sums)
val appIdSize = QualOutputWriter.getAppIdSize(sums)
val sqlDescSize =
QualOutputWriter.getSqlDescSize(sums, maxSQLDescLength, QualOutputWriter.TEXT_DELIMITER)
val headersAndSizes =
QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, appIdSize, sqlDescSize)
QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appIdSize, sqlDescSize)
val entireHeader = QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
TEXT_DELIMITER, true)
val sep = "=" * (entireHeader.size - 1)
Expand Down Expand Up @@ -783,12 +823,13 @@ object QualOutputWriter {
QualOutputWriter.constructOutputRowFromMap(headersAndSizes, delimiter, prettyPrint)
}

/**
* Constructs a row from the PerSQL Summary Info. This is called by the RunningQualWriter
*/
def getDetailedPerSqlHeaderStringsAndSizes(
appMaxNameSize: Int,
appMaxIdSize: Int,
sqlDescLength: Int): LinkedHashMap[String, Int] = {
val detailedHeadersAndFields = LinkedHashMap[String, Int](
APP_NAME_STR -> appMaxNameSize,
APP_ID_STR -> appMaxIdSize,
ROOT_SQL_ID_STR -> ROOT_SQL_ID_STR.size,
SQL_ID_STR -> SQL_ID_STR.size,
Expand All @@ -807,6 +848,10 @@ object QualOutputWriter {
replaceDelimiter(escapedMetaStr, delimiter)
}

/**
* Constructs a row from the PerSQL Summary Info.
* This method is used by the RunningQualificationApp
*/
def constructPerSqlSummaryInfo(
sumInfo: EstimatedPerSQLSummaryInfo,
headersAndSizes: LinkedHashMap[String, Int],
Expand All @@ -818,7 +863,6 @@ object QualOutputWriter {
val reformatCSVFunc : String => String =
if (reformatCSV) str => StringUtils.reformatCSVString(str) else str => str
val data = ListBuffer[(String, Int)](
reformatCSVFunc(sumInfo.info.appName) -> headersAndSizes(APP_NAME_STR),
reformatCSVFunc(sumInfo.info.appId) -> appIdMaxSize,
reformatCSVFunc(sumInfo.rootExecutionID.getOrElse("").toString)-> ROOT_SQL_ID_STR.size,
sumInfo.sqlID.toString -> SQL_ID_STR.size,
Expand Down Expand Up @@ -848,6 +892,22 @@ object QualOutputWriter {
detailedHeadersAndFields
}

/**
* Construct the headers for the PerSql Summary (csv file).
* rapids_4_spark_qualification_output_persql.csv
* @return LinkedHashMap[String, Int]
*/
private def getPerSqlHeaderStrings: LinkedHashMap[String, Int] = {
val detailedHeadersAndFields = LinkedHashMap[String, Int](
APP_ID_STR -> APP_ID_STR.size,
ROOT_SQL_ID_STR -> ROOT_SQL_ID_STR.size,
SQL_ID_STR -> SQL_ID_STR.size,
SQL_DESC_STR -> SQL_DESC_STR.size,
SQL_DUR_STR -> SQL_DUR_STR_SIZE,
GPU_OPPORTUNITY_STR -> GPU_OPPORTUNITY_STR_SIZE)
detailedHeadersAndFields
}

private def getClusterInfoHeaderStrings: mutable.LinkedHashMap[String, Int] = {
val headersAndFields = Seq(
APP_ID_STR, APP_NAME_STR, STATUS_REPORT_PATH_STR, VENDOR, DRIVER_HOST, CLUSTER_ID_STR,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration,
sortForExecutiveSummary(sortedDescDetailed, order), numRows)
qWriter.writeDetailedCSVReport(sortedDescDetailed)
if (reportSqlLevel) {
qWriter.writePerSqlTextReport(allAppsSum, numRows, maxSQLDescLength)
qWriter.writePerSqlCSVReport(allAppsSum, maxSQLDescLength)
}
qWriter.writeExecReport(allAppsSum)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ class RunningQualOutputWriter(

// we don't know max length since process per query, hardcode for 100 for now
private val SQL_DESC_LENGTH = 100
private val appNameSize = if (appName.nonEmpty) appName.size else 100
val headersAndSizes = QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize,
appId.size, SQL_DESC_LENGTH)
val headersAndSizes = QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appId.size,
SQL_DESC_LENGTH)
val entireTextHeader = QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
TEXT_DELIMITER, true)
private val sep = "=" * (entireTextHeader.size - 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,9 @@ class RunningQualificationApp(

// we don't know the max sql query name size so lets cap it at 100
private val SQL_DESC_LENGTH = 100
private lazy val appNameSize = {
val runningAppName = getAppName
if (runningAppName.nonEmpty) {
runningAppName.size
} else {
100
}
}

private lazy val perSqlHeadersAndSizes = {
QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize,
appId.size, SQL_DESC_LENGTH)
QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appId.size, SQL_DESC_LENGTH)
}

def this() = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"TPC-DS Like Bench q86","app-20210319163812-1778","",0,"Register input tables",2,2
"TPC-DS Like Bench q86","app-20210319163812-1778","",21,"Register input tables",1,1
"TPC-DS Like Bench q86","app-20210319163812-1778","",5,"Register input tables",1,1
"TPC-DS Like Bench q86","app-20210319163812-1778","",6,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",15,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",3,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",12,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",18,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",9,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",19,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",1,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",10,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",16,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",7,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",22,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",13,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",4,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",14,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",20,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",2,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",11,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",17,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",8,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",23,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565
App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"app-20210319163812-1778","",0,"Register input tables",2,2
"app-20210319163812-1778","",21,"Register input tables",1,1
"app-20210319163812-1778","",5,"Register input tables",1,1
"app-20210319163812-1778","",6,"Register input tables",0,0
"app-20210319163812-1778","",15,"Register input tables",0,0
"app-20210319163812-1778","",3,"Register input tables",0,0
"app-20210319163812-1778","",12,"Register input tables",0,0
"app-20210319163812-1778","",18,"Register input tables",0,0
"app-20210319163812-1778","",9,"Register input tables",0,0
"app-20210319163812-1778","",19,"Register input tables",0,0
"app-20210319163812-1778","",1,"Register input tables",0,0
"app-20210319163812-1778","",10,"Register input tables",0,0
"app-20210319163812-1778","",16,"Register input tables",0,0
"app-20210319163812-1778","",7,"Register input tables",0,0
"app-20210319163812-1778","",22,"Register input tables",0,0
"app-20210319163812-1778","",13,"Register input tables",0,0
"app-20210319163812-1778","",4,"Register input tables",0,0
"app-20210319163812-1778","",14,"Register input tables",0,0
"app-20210319163812-1778","",20,"Register input tables",0,0
"app-20210319163812-1778","",2,"Register input tables",0,0
"app-20210319163812-1778","",11,"Register input tables",0,0
"app-20210319163812-1778","",17,"Register input tables",0,0
"app-20210319163812-1778","",8,"Register input tables",0,0
"app-20210319163812-1778","",23,"Register input tables",0,0
"app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"TPC-DS Like Bench q86","app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565
"TPC-DS Like Bench q86","app-20210319163812-1778","",0,"Register input tables",2,2
"TPC-DS Like Bench q86","app-20210319163812-1778","",21,"Register input tables",1,1
"TPC-DS Like Bench q86","app-20210319163812-1778","",5,"Register input tables",1,1
"TPC-DS Like Bench q86","app-20210319163812-1778","",6,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",15,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",3,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",12,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",18,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",9,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",19,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",1,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",10,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",16,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",7,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",22,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",13,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",4,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",14,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",20,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",2,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",11,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",17,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",8,"Register input tables",0,0
"TPC-DS Like Bench q86","app-20210319163812-1778","",23,"Register input tables",0,0
App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565
"app-20210319163812-1778","",0,"Register input tables",2,2
"app-20210319163812-1778","",21,"Register input tables",1,1
"app-20210319163812-1778","",5,"Register input tables",1,1
"app-20210319163812-1778","",6,"Register input tables",0,0
"app-20210319163812-1778","",15,"Register input tables",0,0
"app-20210319163812-1778","",3,"Register input tables",0,0
"app-20210319163812-1778","",12,"Register input tables",0,0
"app-20210319163812-1778","",18,"Register input tables",0,0
"app-20210319163812-1778","",9,"Register input tables",0,0
"app-20210319163812-1778","",19,"Register input tables",0,0
"app-20210319163812-1778","",1,"Register input tables",0,0
"app-20210319163812-1778","",10,"Register input tables",0,0
"app-20210319163812-1778","",16,"Register input tables",0,0
"app-20210319163812-1778","",7,"Register input tables",0,0
"app-20210319163812-1778","",22,"Register input tables",0,0
"app-20210319163812-1778","",13,"Register input tables",0,0
"app-20210319163812-1778","",4,"Register input tables",0,0
"app-20210319163812-1778","",14,"Register input tables",0,0
"app-20210319163812-1778","",20,"Register input tables",0,0
"app-20210319163812-1778","",2,"Register input tables",0,0
"app-20210319163812-1778","",11,"Register input tables",0,0
"app-20210319163812-1778","",17,"Register input tables",0,0
"app-20210319163812-1778","",8,"Register input tables",0,0
"app-20210319163812-1778","",23,"Register input tables",0,0
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",1,"count at QualificationInfoUtils.scala:94",7143,6719
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",3,"count at QualificationInfoUtils.scala:94",2052,1660
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",2,"count at QualificationInfoUtils.scala:94",1933,1551
"Spark shell","local-1651187225439","",0,"show at <console>:26",498,333
"Spark shell","local-1651188809790","",0,"show at <console>:26",715,242
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",0,"json at QualificationInfoUtils.scala:76",1306,164
"Spark shell","local-1651188809790","",1,"show at <console>:26",196,135
"Spark shell","local-1651187225439","",1,"show at <console>:26",262,110
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",2,"json at QualificationInfoUtils.scala:136",321,107
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",5,"json at QualificationInfoUtils.scala:136",129,43
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",8,"json at QualificationInfoUtils.scala:136",127,42
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",4,"createOrReplaceTempView at QualificationInfoUtils.scala:133",22,22
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",7,"createOrReplaceTempView at QualificationInfoUtils.scala:133",4,4
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",1,"createOrReplaceTempView at QualificationInfoUtils.scala:133",2,2
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",0,"json at QualificationInfoUtils.scala:130",1209,0
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",6,"json at QualificationInfoUtils.scala:130",110,0
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",3,"json at QualificationInfoUtils.scala:130",108,0
App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity
"local-1622043423018","",1,"count at QualificationInfoUtils.scala:94",7143,6719
"local-1622043423018","",3,"count at QualificationInfoUtils.scala:94",2052,1660
"local-1622043423018","",2,"count at QualificationInfoUtils.scala:94",1933,1551
"local-1651187225439","",0,"show at <console>:26",498,333
"local-1651188809790","",0,"show at <console>:26",715,242
"local-1622043423018","",0,"json at QualificationInfoUtils.scala:76",1306,164
"local-1651188809790","",1,"show at <console>:26",196,135
"local-1651187225439","",1,"show at <console>:26",262,110
"local-1623281204390","",2,"json at QualificationInfoUtils.scala:136",321,107
"local-1623281204390","",5,"json at QualificationInfoUtils.scala:136",129,43
"local-1623281204390","",8,"json at QualificationInfoUtils.scala:136",127,42
"local-1623281204390","",4,"createOrReplaceTempView at QualificationInfoUtils.scala:133",22,22
"local-1623281204390","",7,"createOrReplaceTempView at QualificationInfoUtils.scala:133",4,4
"local-1623281204390","",1,"createOrReplaceTempView at QualificationInfoUtils.scala:133",2,2
"local-1623281204390","",0,"json at QualificationInfoUtils.scala:130",1209,0
"local-1623281204390","",6,"json at QualificationInfoUtils.scala:130",110,0
"local-1623281204390","",3,"json at QualificationInfoUtils.scala:130",108,0
Loading

0 comments on commit 38aad12

Please sign in to comment.