Skip to content

Commit

Permalink
Merge dev into main
Browse files Browse the repository at this point in the history
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
  • Loading branch information
nvauto committed Apr 24, 2024
2 parents c1aa05d + 79697ad commit 99c1651
Show file tree
Hide file tree
Showing 84 changed files with 3,039 additions and 894 deletions.
4 changes: 2 additions & 2 deletions core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ The Profiling tool generates information which can be used for debugging and pro
Information such as Spark version, executor information, properties and so on. This runs on either CPU or
GPU generated event logs.

Please refer to [Qualification tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html)
and [Profiling tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html)
Please refer to [Qualification tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/qualification/overview.html)
and [Profiling tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/profiling/overview.html)
for more details on how to use the tools.

## Build
Expand Down
19 changes: 9 additions & 10 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<artifactId>rapids-4-spark-tools_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark tools</name>
<description>RAPIDS Accelerator for Apache Spark tools</description>
<version>24.02.2</version>
<version>24.02.3-SNAPSHOT</version>
<packaging>jar</packaging>
<url>http://github.com/NVIDIA/spark-rapids-tools</url>

Expand Down Expand Up @@ -382,6 +382,7 @@
<properties>
<buildver>350</buildver>
<spark.version>${spark350.version}</spark.version>
<delta.core.artifactory>${delta.core.artifactory.post35}</delta.core.artifactory>
<delta.core.version>${delta31x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
Expand All @@ -397,6 +398,7 @@
<properties>
<buildver>351</buildver>
<spark.version>${spark351.version}</spark.version>
<delta.core.artifactory>${delta.core.artifactory.post35}</delta.core.artifactory>
<delta.core.version>${delta31x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
Expand All @@ -412,6 +414,7 @@
<properties>
<buildver>352</buildver>
<spark.version>${spark352.version}</spark.version>
<delta.core.artifactory>${delta.core.artifactory.post35}</delta.core.artifactory>
<delta.core.version>${delta31x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
Expand All @@ -427,6 +430,7 @@
<properties>
<buildver>400</buildver>
<spark.version>${spark400.version}</spark.version>
<delta.core.artifactory>${delta.core.artifactory.post35}</delta.core.artifactory>
<delta.core.version>${delta31x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
Expand Down Expand Up @@ -487,10 +491,10 @@
<delta22x.version>2.2.0</delta22x.version>
<delta23x.version>2.3.0</delta23x.version>
<delta24x.version>2.4.0</delta24x.version>
<!-- TODO: Fix delta version should be 3.1.0 but this requires scala-binary 2.13 which we
don't have yet -->
<delta31x.version>2.4.0</delta31x.version>
<delta31x.version>3.1.0</delta31x.version>
<delta.core.version>${delta24x.version}</delta.core.version>
<delta.core.artifactory>delta-core_${scala.binary.version}</delta.core.artifactory>
<delta.core.artifactory.post35>delta-spark_${scala.binary.version}</delta.core.artifactory.post35>
<!-- environment properties -->
<java.version>1.8</java.version>
<platform-encoding>UTF-8</platform-encoding>
Expand Down Expand Up @@ -571,7 +575,7 @@
<dependency>
<!-- add delta-lake to test against delta-lake write optimizations -->
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<artifactId>${delta.core.artifactory}</artifactId>
<version>${delta.core.version}</version>
<scope>test</scope>
</dependency>
Expand Down Expand Up @@ -950,10 +954,5 @@
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<!-- added to support preview releases from iodelta -->
<id>iodelta-staging-repo</id>
<url> https://oss.sonatype.org/content/repositories/iodelta-1080/</url>
</repository>
</repositories>
</project>
296 changes: 296 additions & 0 deletions core/src/main/resources/operatorsScore-databricks-aws-a10G.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
CPUOperator,Score
CoalesceExec,2.45
CollectLimitExec,2.45
ExpandExec,2.45
FileSourceScanExec,2.45
FilterExec,3.75
GenerateExec,2.45
GlobalLimitExec,2.45
LocalLimitExec,2.45
ProjectExec,2.45
RangeExec,2.45
SampleExec,2.45
SortExec,6.11
SubqueryBroadcastExec,2.45
TakeOrderedAndProjectExec,2.45
UnionExec,2.45
CustomShuffleReaderExec,2.45
HashAggregateExec,3.23
ObjectHashAggregateExec,2.45
SortAggregateExec,2.45
InMemoryTableScanExec,2.45
DataWritingCommandExec,2.45
AtomicReplaceTableAsSelectExec,2.45
AtomicCreateTableAsSelectExec,2.45
OverwriteByExpressionExecV1,2.45
AppendDataExecV1,2.45
ExecutedCommandExec,2.45
BatchScanExec,2.45
BroadcastExchangeExec,2.45
ShuffleExchangeExec,2.78
BroadcastHashJoinExec,2.82
BroadcastNestedLoopJoinExec,2.45
CartesianProductExec,2.45
ShuffledHashJoinExec,2.45
SortMergeJoinExec,20.57
FlatMapCoGroupsInPandasExec,2.45
WindowExec,2.45
WindowGroupLimitExec,2.45
HiveTableScanExec,2.45
Abs,2.45
Acos,2.45
Acosh,2.45
Add,2.45
AggregateExpression,2.45
Alias,2.45
And,2.45
ApproximatePercentile,2.45
ArrayContains,2.45
ArrayExcept,2.45
ArrayExists,2.45
ArrayIntersect,2.45
ArrayMax,2.45
ArrayMin,2.45
ArrayRemove,2.45
ArrayRepeat,2.45
ArrayTransform,2.45
ArrayUnion,2.45
ArraysOverlap,2.45
ArraysZip,2.45
Asin,2.45
Asinh,2.45
AtLeastNNonNulls,2.45
Atan,2.45
Atanh,2.45
AttributeReference,2.45
Average,2.45
BRound,2.45
BitLength,2.45
BitwiseAnd,2.45
BitwiseNot,2.45
BitwiseOr,2.45
BitwiseXor,2.45
CaseWhen,2.45
Cbrt,2.45
Ceil,2.45
CheckOverflow,2.45
Coalesce,2.45
CollectList,2.45
CollectSet,2.45
Concat,2.45
ConcatWs,2.45
Contains,2.45
Conv,2.45
Cos,2.45
Cosh,2.45
Cot,2.45
Count,2.45
CreateArray,2.45
CreateMap,2.45
CreateNamedStruct,2.45
CurrentRow$,2.45
DateAdd,2.45
DateAddInterval,2.45
DateDiff,2.45
DateFormatClass,2.45
DateSub,2.45
DayOfMonth,2.45
DayOfWeek,2.45
DayOfYear,2.45
DenseRank,2.45
Divide,2.45
DynamicPruningExpression,2.45
ElementAt,2.45
EndsWith,2.45
EqualNullSafe,2.45
EqualTo,2.45
Exp,2.45
Explode,2.45
Expm1,2.45
First,2.45
Flatten,2.45
Floor,2.45
FormatNumber,2.45
FromUTCTimestamp,2.45
FromUnixTime,2.45
GetArrayItem,2.45
GetArrayStructFields,2.45
GetJsonObject,2.45
GetMapValue,2.45
GetStructField,2.45
GetTimestamp,2.45
GreaterThan,2.45
GreaterThanOrEqual,2.45
Greatest,2.45
HiveGenericUDF,2.45
HiveSimpleUDF,2.45
Hour,2.45
Hypot,2.45
If,2.45
In,2.45
InSet,2.45
InitCap,2.45
InputFileBlockLength,2.45
InputFileBlockStart,2.45
InputFileName,2.45
IntegralDivide,2.45
IsNaN,2.45
IsNotNull,2.45
IsNull,2.45
JsonToStructs,2.45
JsonTuple,2.45
KnownFloatingPointNormalized,2.45
KnownNotNull,2.45
Lag,2.45
LambdaFunction,2.45
Last,2.45
LastDay,2.45
Lead,2.45
Least,2.45
Length,2.45
LessThan,2.45
LessThanOrEqual,2.45
Like,2.45
Literal,2.45
Log,2.45
Log10,2.45
Log1p,2.45
Log2,2.45
Logarithm,2.45
Lower,2.45
MakeDecimal,2.45
MapConcat,2.45
MapEntries,2.45
MapFilter,2.45
MapKeys,2.45
MapValues,2.45
Max,2.45
Md5,2.45
MicrosToTimestamp,2.45
MillisToTimestamp,2.45
Min,2.45
Minute,2.45
MonotonicallyIncreasingID,2.45
Month,2.45
Multiply,2.45
Murmur3Hash,2.45
NaNvl,2.45
NamedLambdaVariable,2.45
NormalizeNaNAndZero,2.45
Not,2.45
NthValue,2.45
OctetLength,2.45
Or,2.45
ParseUrl,2.45
Percentile,2.45
PercentRank,2.45
PivotFirst,2.45
Pmod,2.45
PosExplode,2.45
Pow,2.45
PreciseTimestampConversion,2.45
PromotePrecision,2.45
PythonUDF,2.45
Quarter,2.45
RLike,2.45
RaiseError,2.45
Rand,2.45
Rank,2.45
RegExpExtract,2.45
RegExpExtractAll,2.45
RegExpReplace,2.45
Remainder,2.45
ReplicateRows,2.45
Reverse,2.45
Rint,2.45
Round,2.45
RowNumber,2.45
ScalaUDF,2.45
ScalarSubquery,2.45
Second,2.45
SecondsToTimestamp,2.45
Sequence,2.45
ShiftLeft,2.45
ShiftRight,2.45
ShiftRightUnsigned,2.45
Signum,2.45
Sin,2.45
Sinh,2.45
Size,2.45
SortArray,2.45
SortOrder,2.45
SparkPartitionID,2.45
SpecifiedWindowFrame,2.45
Sqrt,2.45
Stack,2.45
StartsWith,2.45
StddevPop,2.45
StddevSamp,2.45
StringInstr,2.45
StringLPad,2.45
StringLocate,2.45
StringRPad,2.45
StringRepeat,2.45
StringReplace,2.45
StringSplit,2.45
StringToMap,2.45
StringTranslate,2.45
StringTrim,2.45
StringTrimLeft,2.45
StringTrimRight,2.45
StructsToJson,2.45
Substring,2.45
SubstringIndex,2.45
Subtract,2.45
Sum,2.45
Tan,2.45
Tanh,2.45
TimeAdd,2.45
ToDegrees,2.45
ToRadians,2.45
ToUnixTimestamp,2.45
TransformKeys,2.45
TransformValues,2.45
UnaryMinus,2.45
UnaryPositive,2.45
UnboundedFollowing$,2.45
UnboundedPreceding$,2.45
UnixTimestamp,2.45
UnscaledValue,2.45
Upper,2.45
VariancePop,2.45
VarianceSamp,2.45
WeekDay,2.45
WindowExpression,2.45
WindowSpecDefinition,2.45
XxHash64,2.45
Year,2.45
Empty2Null,2.45
WriteFilesExec,2.45
Ascii,2.45
ToUTCTimestamp,2.45
AggregateInPandasExec,1.2
ArrowEvalPythonExec,1.2
FlatMapGroupsInPandasExec,1.2
MapInPandasExec,1.2
PythonMapInArrowExec,2.45
MapInArrowExec,2.45
WindowInPandasExec,1.2
KMeans-pyspark,8.86
KMeans-scala,1.0
PCA-pyspark,2.24
PCA-scala,2.69
LinearRegression-pyspark,2.0
LinearRegression-scala,1.0
RandomForestClassifier-pyspark,6.31
RandomForestClassifier-scala,1.0
RandomForestRegressor-pyspark,3.66
RandomForestRegressor-scala,1.0
XGBoost-pyspark,1.0
XGBoost-scala,3.31
RoundCeil,2.45
RoundFloor,2.45
BloomFilterMightContain,2.45
BloomFilterAggregate,2.45
EphemeralSubstring,2.45
KnownNullable,2.45
Loading

0 comments on commit 99c1651

Please sign in to comment.