diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb index a7f9780d..93c71736 100644 --- a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb +++ b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb @@ -52,7 +52,9 @@ " total_time += round(end - start, 2)\n", " count = count + 1\n", " print(\"Retry times : {}, \".format(count) + appName + \" microbenchmark takes {} seconds\".format(round(end - start, 2)))\n", - " print(appName + \" microbenchmark takes average {} seconds after {} retries\".format(round(total_time/retryTimes),retryTimes))" + " print(appName + \" microbenchmark takes average {} seconds after {} retries\".format(round(total_time/retryTimes),retryTimes))\n", + " with open('result.txt', 'a') as file:\n", + " file.write(\"{},{},{}\\n\".format(appName, round(total_time/retryTimes), retryTimes))" ] }, { @@ -111,12 +113,12 @@ "# Load dataframe and create tempView\n", "# You need to update data path to your real path!\n", "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/customer\").createOrReplaceTempView(\"customer\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/store_sales\").createOrReplaceTempView(\"store_sales\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/catalog_sales\").createOrReplaceTempView(\"catalog_sales\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/web_sales\").createOrReplaceTempView(\"web_sales\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/item\").createOrReplaceTempView(\"item\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/date_dim\").createOrReplaceTempView(\"date_dim\")\n", + "spark.read.parquet(dataRoot + \"/customer.dat\").createOrReplaceTempView(\"customer\")\n", + "spark.read.parquet(dataRoot + \"/store_sales.dat\").createOrReplaceTempView(\"store_sales\")\n", + "spark.read.parquet(dataRoot + \"/catalog_sales.dat\").createOrReplaceTempView(\"catalog_sales\")\n", + "spark.read.parquet(dataRoot + \"/web_sales.dat\").createOrReplaceTempView(\"web_sales\")\n", + "spark.read.parquet(dataRoot + \"/item.dat\").createOrReplaceTempView(\"item\")\n", + "spark.read.parquet(dataRoot + \"/date_dim.dat\").createOrReplaceTempView(\"date_dim\")\n", "print(\"-\"*50)" ] }, @@ -497,7 +499,7 @@ ], "source": [ "start = time() \n", - "spark.read.parquet(dataRoot + \"/tpcds/customer\").limit(1000000).write.format(\"parquet\").mode(\"overwrite\").save(\"/data/tmp/customer1m\")\n", + "spark.read.parquet(dataRoot + \"/customer.dat\").limit(1000000).write.format(\"parquet\").mode(\"overwrite\").save(\"/data/tmp/customer1m\")\n", "end = time()\n", "# Parquet file scanning and writing will be about 3 times faster running on GPU\n", "print(\"scanning and writing parquet cost : {} seconds\".format(round(end - start, 2)))\n", @@ -557,8 +559,8 @@ "metadata": {}, "outputs": [], "source": [ - "spark.read.parquet(dataRoot + \"/tpcds/store_sales\").createOrReplaceTempView(\"store_sales\")\n", - "spark.read.parquet(dataRoot + \"/tpcds/store_returns\").createOrReplaceTempView(\"store_returns\")\n", + "spark.read.parquet(dataRoot + \"/store_sales.dat\").createOrReplaceTempView(\"store_sales\")\n", + "spark.read.parquet(dataRoot + \"/store_returns.dat\").createOrReplaceTempView(\"store_returns\")\n", "\n", "print(\"-\"*50)\n", "query = '''\n",