From 38aad1220a0b4b95ff0392e742c45b79d59e4cf5 Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Mon, 10 Feb 2025 10:07:00 -0600 Subject: [PATCH] Disable Per-SQL summary text output (#1530) Signed-off-by: Ahmed Hussein (amahussein) Fixes #1527 Disable the text format output generated per-sql. The target is to reduce the noise of the stdout and improve the performance of the core-tools ### Impact on the output: - `rapids_4_spark_qualification_output_persql.log` is not generated anymore by the qualTool - remove column `AppName` from `rapids_4_spark_qualification_output_persql.csv` - the `rapids_4_spark_qualification_output_persql.log` can still be generated by the RunningQualificationApp - The order of the SQls in the CSV file has changed. Sorted Desc based on (GPU opportunity, and DF duration) whithin each app. Perviously, the SQLs were sorted globally which might cause considerable overhead for a large number of eventogs. ### Impact on Performance and usability: - Improve readability of the stdout/log generated by the tools. - Reduce the size of lines consumed by the python wrapper. - Sorting Sqls per-app implies less memory requirements since it is only required to maintain the list of SQL for the current iteration. - Improve the string construction by avoiding filling `Buffer` - Improve the performance by skipping generating and writing the log file to the disk. --- .../tool/qualification/QualOutputWriter.scala | 98 +++++++++++++++---- .../tool/qualification/Qualification.scala | 1 - .../RunningQualOutputWriter.scala | 5 +- .../RunningQualificationApp.scala | 11 +-- .../nds_q86_fail_test_expectation_persql.csv | 52 +++++----- .../nds_q86_test_expectation_persql.csv | 52 +++++----- .../qual_test_simple_expectation_persql.csv | 36 +++---- .../qualification/QualificationSuite.scala | 40 ++++---- 8 files changed, 172 insertions(+), 123 deletions(-) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala index 484a9fa6a..852002ae1 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/QualOutputWriter.scala @@ -191,25 +191,65 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean, } } + /** + * Write the per SQL CSV report. This is used by the QualificationApp to write CSV report for the + * SQLs in the application. + * The SQLs within each app are sorted DESC by GPU opportunity and SQL DF Duration in + * descending order. + * Note that the caller takes the responsibility of sorting the App summaries; which determines + * the order of each AppID in the report. + * + * The implementation is optimized as follows: + * - reduce the objects allocations resulting avoiding using ListBuffer(string, int). + * - uses raw interpolation which is faster way of concatenating strings. + * - does not sort all the SQLs of the applications. Instead, sort SQLs locally per app which + * reduces the size of the sorting data. + * + * @param sums list of QualificationSummaryInfo + * @param maxSQLDescLength the maximum length allowed in the SQl description field. + */ def writePerSqlCSVReport(sums: Seq[QualificationSummaryInfo], maxSQLDescLength: Int): Unit = { + val delimiter = QualOutputWriter.CSV_DELIMITER + val emptyString = StringUtils.reformatCSVString("") + + def constructRowFromPerSqlSummary( + appID: String, sumInfo: EstimatedPerSQLSummaryInfo): String = { + val rootID = sumInfo.rootExecutionID match { + case Some(id) => StringUtils.reformatCSVString(id.toString) + case _ => emptyString + } + val sqlDescr = + StringUtils.reformatCSVString( + QualOutputWriter.formatSQLDescription(sumInfo.sqlDesc, maxSQLDescLength, delimiter)) + // Use raw interpolation which has better performance compared to sInterpolation because it + // does not process escaped characters. + raw"$appID$delimiter$rootID$delimiter${sumInfo.sqlID}$delimiter$sqlDescr$delimiter" + + raw"${sumInfo.info.sqlDfDuration}$delimiter${sumInfo.info.gpuOpportunity}" + } + val csvFileWriter = new ToolTextFileWriter(outputDir, s"${QualOutputWriter.LOGFILE_NAME}_persql.csv", "Per SQL CSV Report", hadoopConf) try { - val appNameSize = QualOutputWriter.getAppNameSize(sums) - val appIdSize = QualOutputWriter.getAppIdSize(sums) - val sqlDescSize = - QualOutputWriter.getSqlDescSize(sums, maxSQLDescLength, QualOutputWriter.CSV_DELIMITER) - val headersAndSizes = - QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, appIdSize, sqlDescSize) - csvFileWriter.write(QualOutputWriter.constructDetailedHeader(headersAndSizes, - QualOutputWriter.CSV_DELIMITER, false)) - val appIdMaxSize = QualOutputWriter.getAppIdSize(sums) - val sortedInfo = sortPerSqlInfo(sums) - sortedInfo.foreach { sumInfo => - val row = QualOutputWriter.constructPerSqlSummaryInfo(sumInfo, headersAndSizes, - appIdMaxSize, ",", false, maxSQLDescLength) - csvFileWriter.write(row) + csvFileWriter.write( + QualOutputWriter.constructOutputRowFromMap(QualOutputWriter.getPerSqlHeaderStrings, + QualOutputWriter.CSV_DELIMITER)) + // Write the perSQL info for each app. + sums.foreach { sum => + sum.perSQLEstimatedInfo match { + case Some(perSqlArr) => + if (perSqlArr.nonEmpty) { + val appIDStr = StringUtils.reformatCSVString(sum.appId) + perSqlArr.sortBy(sum => { + (-sum.info.gpuOpportunity, -sum.info.appDur) + }).foreach { sqlInfo => + csvFileWriter.write(constructRowFromPerSqlSummary(appIDStr, sqlInfo)) + // add new line separately to avoid processing escape characters. + csvFileWriter.write("\n") + } + } + case _ => // Do nothing + } } } finally { csvFileWriter.close() @@ -228,15 +268,15 @@ class QualOutputWriter(outputDir: String, reportReadSchema: Boolean, sortedAsc.reverse } } + private def writePerSqlTextSummary(writer: ToolTextFileWriter, sums: Seq[QualificationSummaryInfo], numOutputRows: Int, maxSQLDescLength: Int): Unit = { - val appNameSize = QualOutputWriter.getAppNameSize(sums) val appIdSize = QualOutputWriter.getAppIdSize(sums) val sqlDescSize = QualOutputWriter.getSqlDescSize(sums, maxSQLDescLength, QualOutputWriter.TEXT_DELIMITER) val headersAndSizes = - QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, appIdSize, sqlDescSize) + QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appIdSize, sqlDescSize) val entireHeader = QualOutputWriter.constructOutputRowFromMap(headersAndSizes, TEXT_DELIMITER, true) val sep = "=" * (entireHeader.size - 1) @@ -783,12 +823,13 @@ object QualOutputWriter { QualOutputWriter.constructOutputRowFromMap(headersAndSizes, delimiter, prettyPrint) } + /** + * Constructs a row from the PerSQL Summary Info. This is called by the RunningQualWriter + */ def getDetailedPerSqlHeaderStringsAndSizes( - appMaxNameSize: Int, appMaxIdSize: Int, sqlDescLength: Int): LinkedHashMap[String, Int] = { val detailedHeadersAndFields = LinkedHashMap[String, Int]( - APP_NAME_STR -> appMaxNameSize, APP_ID_STR -> appMaxIdSize, ROOT_SQL_ID_STR -> ROOT_SQL_ID_STR.size, SQL_ID_STR -> SQL_ID_STR.size, @@ -807,6 +848,10 @@ object QualOutputWriter { replaceDelimiter(escapedMetaStr, delimiter) } + /** + * Constructs a row from the PerSQL Summary Info. + * This method is used by the RunningQualificationApp + */ def constructPerSqlSummaryInfo( sumInfo: EstimatedPerSQLSummaryInfo, headersAndSizes: LinkedHashMap[String, Int], @@ -818,7 +863,6 @@ object QualOutputWriter { val reformatCSVFunc : String => String = if (reformatCSV) str => StringUtils.reformatCSVString(str) else str => str val data = ListBuffer[(String, Int)]( - reformatCSVFunc(sumInfo.info.appName) -> headersAndSizes(APP_NAME_STR), reformatCSVFunc(sumInfo.info.appId) -> appIdMaxSize, reformatCSVFunc(sumInfo.rootExecutionID.getOrElse("").toString)-> ROOT_SQL_ID_STR.size, sumInfo.sqlID.toString -> SQL_ID_STR.size, @@ -848,6 +892,22 @@ object QualOutputWriter { detailedHeadersAndFields } + /** + * Construct the headers for the PerSql Summary (csv file). + * rapids_4_spark_qualification_output_persql.csv + * @return LinkedHashMap[String, Int] + */ + private def getPerSqlHeaderStrings: LinkedHashMap[String, Int] = { + val detailedHeadersAndFields = LinkedHashMap[String, Int]( + APP_ID_STR -> APP_ID_STR.size, + ROOT_SQL_ID_STR -> ROOT_SQL_ID_STR.size, + SQL_ID_STR -> SQL_ID_STR.size, + SQL_DESC_STR -> SQL_DESC_STR.size, + SQL_DUR_STR -> SQL_DUR_STR_SIZE, + GPU_OPPORTUNITY_STR -> GPU_OPPORTUNITY_STR_SIZE) + detailedHeadersAndFields + } + private def getClusterInfoHeaderStrings: mutable.LinkedHashMap[String, Int] = { val headersAndFields = Seq( APP_ID_STR, APP_NAME_STR, STATUS_REPORT_PATH_STR, VENDOR, DRIVER_HOST, CLUSTER_ID_STR, diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala index 8962974ea..6840b1973 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala @@ -248,7 +248,6 @@ class Qualification(outputPath: String, numRows: Int, hadoopConf: Configuration, sortForExecutiveSummary(sortedDescDetailed, order), numRows) qWriter.writeDetailedCSVReport(sortedDescDetailed) if (reportSqlLevel) { - qWriter.writePerSqlTextReport(allAppsSum, numRows, maxSQLDescLength) qWriter.writePerSqlCSVReport(allAppsSum, maxSQLDescLength) } qWriter.writeExecReport(allAppsSum) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualOutputWriter.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualOutputWriter.scala index 4e4e6efcf..bb4ed60c9 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualOutputWriter.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualOutputWriter.scala @@ -51,9 +51,8 @@ class RunningQualOutputWriter( // we don't know max length since process per query, hardcode for 100 for now private val SQL_DESC_LENGTH = 100 - private val appNameSize = if (appName.nonEmpty) appName.size else 100 - val headersAndSizes = QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, - appId.size, SQL_DESC_LENGTH) + val headersAndSizes = QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appId.size, + SQL_DESC_LENGTH) val entireTextHeader = QualOutputWriter.constructOutputRowFromMap(headersAndSizes, TEXT_DELIMITER, true) private val sep = "=" * (entireTextHeader.size - 1) diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualificationApp.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualificationApp.scala index c47ce84aa..1f5c5516b 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualificationApp.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/qualification/RunningQualificationApp.scala @@ -81,18 +81,9 @@ class RunningQualificationApp( // we don't know the max sql query name size so lets cap it at 100 private val SQL_DESC_LENGTH = 100 - private lazy val appNameSize = { - val runningAppName = getAppName - if (runningAppName.nonEmpty) { - runningAppName.size - } else { - 100 - } - } private lazy val perSqlHeadersAndSizes = { - QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize, - appId.size, SQL_DESC_LENGTH) + QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appId.size, SQL_DESC_LENGTH) } def this() = { diff --git a/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation_persql.csv b/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation_persql.csv index 49a4d7e54..2d0a173e4 100644 --- a/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation_persql.csv +++ b/core/src/test/resources/QualificationExpectations/nds_q86_fail_test_expectation_persql.csv @@ -1,26 +1,26 @@ -App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity -"TPC-DS Like Bench q86","app-20210319163812-1778","",0,"Register input tables",2,2 -"TPC-DS Like Bench q86","app-20210319163812-1778","",21,"Register input tables",1,1 -"TPC-DS Like Bench q86","app-20210319163812-1778","",5,"Register input tables",1,1 -"TPC-DS Like Bench q86","app-20210319163812-1778","",6,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",15,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",3,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",12,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",18,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",9,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",19,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",1,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",10,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",16,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",7,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",22,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",13,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",4,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",14,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",20,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",2,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",11,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",17,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",8,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",23,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565 +App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity +"app-20210319163812-1778","",0,"Register input tables",2,2 +"app-20210319163812-1778","",21,"Register input tables",1,1 +"app-20210319163812-1778","",5,"Register input tables",1,1 +"app-20210319163812-1778","",6,"Register input tables",0,0 +"app-20210319163812-1778","",15,"Register input tables",0,0 +"app-20210319163812-1778","",3,"Register input tables",0,0 +"app-20210319163812-1778","",12,"Register input tables",0,0 +"app-20210319163812-1778","",18,"Register input tables",0,0 +"app-20210319163812-1778","",9,"Register input tables",0,0 +"app-20210319163812-1778","",19,"Register input tables",0,0 +"app-20210319163812-1778","",1,"Register input tables",0,0 +"app-20210319163812-1778","",10,"Register input tables",0,0 +"app-20210319163812-1778","",16,"Register input tables",0,0 +"app-20210319163812-1778","",7,"Register input tables",0,0 +"app-20210319163812-1778","",22,"Register input tables",0,0 +"app-20210319163812-1778","",13,"Register input tables",0,0 +"app-20210319163812-1778","",4,"Register input tables",0,0 +"app-20210319163812-1778","",14,"Register input tables",0,0 +"app-20210319163812-1778","",20,"Register input tables",0,0 +"app-20210319163812-1778","",2,"Register input tables",0,0 +"app-20210319163812-1778","",11,"Register input tables",0,0 +"app-20210319163812-1778","",17,"Register input tables",0,0 +"app-20210319163812-1778","",8,"Register input tables",0,0 +"app-20210319163812-1778","",23,"Register input tables",0,0 +"app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565 diff --git a/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation_persql.csv b/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation_persql.csv index 15d1ee5eb..baf6d7a66 100644 --- a/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation_persql.csv +++ b/core/src/test/resources/QualificationExpectations/nds_q86_test_expectation_persql.csv @@ -1,26 +1,26 @@ -App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity -"TPC-DS Like Bench q86","app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565 -"TPC-DS Like Bench q86","app-20210319163812-1778","",0,"Register input tables",2,2 -"TPC-DS Like Bench q86","app-20210319163812-1778","",21,"Register input tables",1,1 -"TPC-DS Like Bench q86","app-20210319163812-1778","",5,"Register input tables",1,1 -"TPC-DS Like Bench q86","app-20210319163812-1778","",6,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",15,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",3,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",12,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",18,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",9,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",19,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",1,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",10,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",16,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",7,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",22,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",13,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",4,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",14,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",20,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",2,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",11,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",17,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",8,"Register input tables",0,0 -"TPC-DS Like Bench q86","app-20210319163812-1778","",23,"Register input tables",0,0 +App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity +"app-20210319163812-1778","",24,"Benchmark Run: query=q86; iteration=0",9565,9565 +"app-20210319163812-1778","",0,"Register input tables",2,2 +"app-20210319163812-1778","",21,"Register input tables",1,1 +"app-20210319163812-1778","",5,"Register input tables",1,1 +"app-20210319163812-1778","",6,"Register input tables",0,0 +"app-20210319163812-1778","",15,"Register input tables",0,0 +"app-20210319163812-1778","",3,"Register input tables",0,0 +"app-20210319163812-1778","",12,"Register input tables",0,0 +"app-20210319163812-1778","",18,"Register input tables",0,0 +"app-20210319163812-1778","",9,"Register input tables",0,0 +"app-20210319163812-1778","",19,"Register input tables",0,0 +"app-20210319163812-1778","",1,"Register input tables",0,0 +"app-20210319163812-1778","",10,"Register input tables",0,0 +"app-20210319163812-1778","",16,"Register input tables",0,0 +"app-20210319163812-1778","",7,"Register input tables",0,0 +"app-20210319163812-1778","",22,"Register input tables",0,0 +"app-20210319163812-1778","",13,"Register input tables",0,0 +"app-20210319163812-1778","",4,"Register input tables",0,0 +"app-20210319163812-1778","",14,"Register input tables",0,0 +"app-20210319163812-1778","",20,"Register input tables",0,0 +"app-20210319163812-1778","",2,"Register input tables",0,0 +"app-20210319163812-1778","",11,"Register input tables",0,0 +"app-20210319163812-1778","",17,"Register input tables",0,0 +"app-20210319163812-1778","",8,"Register input tables",0,0 +"app-20210319163812-1778","",23,"Register input tables",0,0 diff --git a/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation_persql.csv b/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation_persql.csv index 7d29bc48c..fd545ccea 100644 --- a/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation_persql.csv +++ b/core/src/test/resources/QualificationExpectations/qual_test_simple_expectation_persql.csv @@ -1,18 +1,18 @@ -App Name,App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity -"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",1,"count at QualificationInfoUtils.scala:94",7143,6719 -"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",3,"count at QualificationInfoUtils.scala:94",2052,1660 -"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",2,"count at QualificationInfoUtils.scala:94",1933,1551 -"Spark shell","local-1651187225439","",0,"show at :26",498,333 -"Spark shell","local-1651188809790","",0,"show at :26",715,242 -"Rapids Spark Profiling Tool Unit Tests","local-1622043423018","",0,"json at QualificationInfoUtils.scala:76",1306,164 -"Spark shell","local-1651188809790","",1,"show at :26",196,135 -"Spark shell","local-1651187225439","",1,"show at :26",262,110 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",2,"json at QualificationInfoUtils.scala:136",321,107 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",5,"json at QualificationInfoUtils.scala:136",129,43 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",8,"json at QualificationInfoUtils.scala:136",127,42 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",4,"createOrReplaceTempView at QualificationInfoUtils.scala:133",22,22 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",7,"createOrReplaceTempView at QualificationInfoUtils.scala:133",4,4 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",1,"createOrReplaceTempView at QualificationInfoUtils.scala:133",2,2 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",0,"json at QualificationInfoUtils.scala:130",1209,0 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",6,"json at QualificationInfoUtils.scala:130",110,0 -"Rapids Spark Profiling Tool Unit Tests","local-1623281204390","",3,"json at QualificationInfoUtils.scala:130",108,0 +App ID,Root SQL ID,SQL ID,SQL Description,SQL DF Duration,GPU Opportunity +"local-1622043423018","",1,"count at QualificationInfoUtils.scala:94",7143,6719 +"local-1622043423018","",3,"count at QualificationInfoUtils.scala:94",2052,1660 +"local-1622043423018","",2,"count at QualificationInfoUtils.scala:94",1933,1551 +"local-1651187225439","",0,"show at :26",498,333 +"local-1651188809790","",0,"show at :26",715,242 +"local-1622043423018","",0,"json at QualificationInfoUtils.scala:76",1306,164 +"local-1651188809790","",1,"show at :26",196,135 +"local-1651187225439","",1,"show at :26",262,110 +"local-1623281204390","",2,"json at QualificationInfoUtils.scala:136",321,107 +"local-1623281204390","",5,"json at QualificationInfoUtils.scala:136",129,43 +"local-1623281204390","",8,"json at QualificationInfoUtils.scala:136",127,42 +"local-1623281204390","",4,"createOrReplaceTempView at QualificationInfoUtils.scala:133",22,22 +"local-1623281204390","",7,"createOrReplaceTempView at QualificationInfoUtils.scala:133",4,4 +"local-1623281204390","",1,"createOrReplaceTempView at QualificationInfoUtils.scala:133",2,2 +"local-1623281204390","",0,"json at QualificationInfoUtils.scala:130",1209,0 +"local-1623281204390","",6,"json at QualificationInfoUtils.scala:130",110,0 +"local-1623281204390","",3,"json at QualificationInfoUtils.scala:130",108,0 diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala index e3ca550d5..909ab54df 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala @@ -95,7 +95,6 @@ class QualificationSuite extends BaseTestSuite { (QualOutputWriter.TOTAL_CORE_SEC, LongType)) private val csvPerSQLFields = Seq( - (QualOutputWriter.APP_NAME_STR, StringType), (QualOutputWriter.APP_ID_STR, StringType), (QualOutputWriter.ROOT_SQL_ID_STR, StringType), (QualOutputWriter.SQL_ID_STR, StringType), @@ -226,12 +225,12 @@ class QualificationSuite extends BaseTestSuite { val allFiles = fs.listStatus(outputDirPath) assert(allFiles.size == 6) val dfPerSqlActual = readPerSqlFile(new File(csvOutput0)) - assert(dfPerSqlActual.columns.size == 7) + assert(dfPerSqlActual.columns.size == 6) val rows = dfPerSqlActual.collect() assert(rows.size == 2) val firstRow = rows(1) // , should be replaced with ; - assert(firstRow(4).toString.contains("at QualificationSuite.scala")) + assert(firstRow(3).toString.contains("at QualificationSuite.scala")) // this reads everything into single column val dfPerSqlActualTxt = readPerSqlTextFile(new File(txtOutput0)) @@ -314,8 +313,8 @@ class QualificationSuite extends BaseTestSuite { val inputSource = UTF8Source.fromFile(filename) try { val lines = inputSource.getLines.toArray - // 4 lines of header and footer - assert(lines.size == (4 + 4)) + // 4 lines of headers and footers + assert(lines.size == 4 + 4) // skip the 3 header lines val firstRow = lines(3) assert(firstRow.contains("local-1622043423018")) @@ -323,12 +322,12 @@ class QualificationSuite extends BaseTestSuite { inputSource.close() } val persqlFileName = s"$outpath/rapids_4_spark_qualification_output/" + - s"rapids_4_spark_qualification_output_persql.log" + s"rapids_4_spark_qualification_output_persql.csv" val persqlInputSource = UTF8Source.fromFile(persqlFileName) try { val lines = persqlInputSource.getLines.toArray // 4 lines of header and footer - assert(lines.size == (4 + 17)) + assert(lines.size == (1 + 17)) // skip the 3 header lines val firstRow = lines(3) // this should be app @@ -372,12 +371,12 @@ class QualificationSuite extends BaseTestSuite { inputSource.close() } val persqlFileName = s"$outpath/rapids_4_spark_qualification_output/" + - s"rapids_4_spark_qualification_output_persql.log" + s"rapids_4_spark_qualification_output_persql.csv" val persqlInputSource = UTF8Source.fromFile(persqlFileName) try { val lines = persqlInputSource.getLines - // 4 lines of header and footer, limit is 2 - assert(lines.size == (4 + 2)) + // 1 lines of header, limit is has not impact on CSV file + assert(lines.size == (1 + 17)) } finally { persqlInputSource.close() } @@ -1031,12 +1030,12 @@ class QualificationSuite extends BaseTestSuite { val dfPerSqlActual = readPerSqlFile(new File(persqlResults)) // the number of columns actually won't be wrong if sql description is malformatted // because spark seems to drop extra column so need more checking - assert(dfPerSqlActual.columns.size == 7) + assert(dfPerSqlActual.columns.size == 6) val rows = dfPerSqlActual.collect() assert(rows.size == 3) val firstRow = rows(1) // , should not be replaced with ; or any other delim - assert(firstRow(4) == "testing, csv delimiter, replacement") + assert(firstRow(3) == "testing, csv delimiter, replacement") // parse results from listener val executorCpuTime = NANOSECONDS.toMillis(listener.executorCpuTime) // in milliseconds @@ -1259,26 +1258,27 @@ class QualificationSuite extends BaseTestSuite { } // just basic testing that line exists and has right separator val csvHeader = qualApp.getPerSqlCSVHeader - assert(csvHeader.contains("App Name,App ID,Root SQL ID,SQL ID,SQL Description," + + assert(csvHeader.contains("App ID,Root SQL ID,SQL ID,SQL Description," + "SQL DF Duration,GPU Opportunity")) val txtHeader = qualApp.getPerSqlTextHeader - assert(txtHeader.contains("| App Name| App ID|" + + assert(txtHeader.contains( + "| App ID|" + "Root SQL ID|SQL ID| " + " SQL Description|SQL DF Duration|GPU Opportunity|")) val randHeader = qualApp.getPerSqlHeader(";", true, 20) - assert(randHeader.contains("; App Name; App ID;" + + assert(randHeader.contains("; App ID;" + "Root SQL ID;SQL ID; SQL Description;SQL DF Duration;GPU Opportunity;")) val allSQLIds = qualApp.getAvailableSqlIDs val numSQLIds = allSQLIds.size assert(numSQLIds > 0) val sqlIdToLookup = allSQLIds.head val (csvOut, txtOut) = qualApp.getPerSqlTextAndCSVSummary(sqlIdToLookup) - assert(csvOut.contains("Profiling Tool Unit Tests") && csvOut.contains(","), + assert(csvOut.contains("collect at ToolTestUtils.scala:67") && csvOut.contains(","), s"CSV output was: $csvOut") - assert(txtOut.contains("Profiling Tool Unit Tests") && txtOut.contains("|"), + assert(txtOut.contains("collect at ToolTestUtils.scala:67") && txtOut.contains("|"), s"TXT output was: $txtOut") val sqlOut = qualApp.getPerSQLSummary(sqlIdToLookup, ":", true, 5) - assert(sqlOut.contains("Tool Unit Tests:"), s"SQL output was: $sqlOut") + assert(sqlOut.contains("colle:"), s"SQL output was: $sqlOut") // test different delimiter val sumOut = qualApp.getSummary(":", false) @@ -1485,7 +1485,7 @@ class QualificationSuite extends BaseTestSuite { ToolTestUtils.generateEventLog(eventLogDir, jobName) { spark => import spark.implicits._ val testData = Seq((1), (2)).toDF("id") - spark.sparkContext.setJobDescription("run job with problematic name") + spark.sparkContext.setJobDescription(s"run job with problematic name ($jobName)") testData.createOrReplaceTempView("t1") spark.sql("SELECT id FROM t1") } @@ -1515,7 +1515,7 @@ class QualificationSuite extends BaseTestSuite { s"rapids_4_spark_qualification_output_persql.csv" val outputPerSqlActual = readPerSqlFile(new File(persqlResults), "\"") val rows = outputPerSqlActual.collect() - assert(rows(1)(0).toString == jobName) + assert(rows(1)(3).toString == s"run job with problematic name ($jobName)") } } }