Skip to content

Commit

Permalink
Review comment changes + test case
Browse files Browse the repository at this point in the history
Signed-off-by: Sayed Bilal Bari <sbari@nvidia.com>
  • Loading branch information
sayedbilalbari committed Feb 26, 2025
1 parent 854eaef commit 8e7586c
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,8 @@ object DataWritingCommandExecParser {
}

/**
* Helper function to extract format name from strings like
* "com.nvidia.spark.rapids.GpuParquetFileFormat@9f5022c"
* Helper function to extract file format from class object strings
* like "com.nvidia.spark.rapids.GpuParquetFileFormat@9f5022c"
* Currently RAPIDS plugin dumps raw object name instead
* of pretty file format.
* Refer: https://github.com/NVIDIA/spark-rapids-tools/issues/1561
Expand All @@ -268,9 +268,13 @@ object DataWritingCommandExecParser {
* @return
*/
def extractFormatName(formatStr: String): String = {
val formatRegex = """.*\.(Gpu[a-zA-Z]+FileFormat)(@.*)?""".r
// Extracting file format from the full object string
// 1. `.*\.` - Matches sequence of character between literal dots
// 2. `([a-zA-Z]+)FileFormat` - Captures fileFormat from the class name
// 3. `(@.*)` - Group capturing @ followed by any character
val formatRegex = """.*\.Gpu([a-zA-Z]+)FileFormat(@.*)?""".r
formatStr match {
case formatRegex(formatName, _) => formatName
case formatRegex(fileFormat, _) => fileFormat
case _ => formatStr // Return original if no match
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,39 @@ class WriteOperationParserSuite extends FunSuite {
)
}

test("getWriteOpMetaFromNode - Gpu logs profiler case") {
val testFileFormats = Seq(
("com.nvidia.spark.rapids.GpuParquetFileFormat@9f5022c", "Parquet"),
("com.nvidia.spark.rapids.GpuOrcFileFormat@123abc", "Orc"),
("com.nvidia.spark.rapids.GpuHiveTextFileFormat@123abc", "HiveText"),
("com.nvidia.spark.rapids.GpuHiveParquetFileFormat@123abc", "HiveParquet"),
("com.nvidia.spark.rapids.GpuDeltaFileFormat@123abc", "Delta")
)
testFileFormats.foreach { case (format, expectedDataFormat) =>
val node = new SparkPlanGraphNode(
id = 1,
name = "Execute InsertIntoHadoopFsRelationCommand",
desc = s"Execute InsertIntoHadoopFsRelationCommand gs://path/to/database/table1, " +
s"false, $format, " +
"[serialization.format=1, mergeschema=false, __hive_compatible_bucketed_table_insertion__=true], " +
"Append, `spark_catalog`.`database`.`table`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, " +
"org.apache.spark.sql.execution.datasources.InMemoryFileIndex(gs://path/to/database/table1), " +
"[col01, col02, col03]",
Seq.empty
)
testGetWriteOpMetaFromNode(
node,
expectedExecName = "InsertIntoHadoopFsRelationCommand",
expectedDataFormat = expectedDataFormat,
expectedOutputPath = "gs://path/to/database/table1",
expectedOutputColumns = "col01;col02;col03",
expectedWriteMode = "Append",
expectedTableName = "table1",
expectedDatabaseName = "database"
)
}
}

test("AppendDataExecV1 - delta format") {
val node = new SparkPlanGraphNode(
id = 3,
Expand Down

0 comments on commit 8e7586c

Please sign in to comment.