-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathsetup_spark_remote.sh
executable file
·61 lines (50 loc) · 1.77 KB
/
setup_spark_remote.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env bash
set -xve
echo "Setting up spark-connect"
mkdir -p "$HOME"/spark
cd "$HOME"/spark || exit 1
version=$(wget -O - https://dlcdn.apache.org/spark/ | grep 'href="spark' | grep -v 'preview' | sed 's:</a>:\n:g' | sed -n 's/.*>//p' | tr -d spark- | tr -d / | sort -r --version-sort | head -1)
if [ -z "$version" ]; then
echo "Failed to extract Spark version"
exit 1
fi
spark=spark-${version}-bin-hadoop3
spark_connect="spark-connect_2.12"
mkdir -p "${spark}"
SERVER_SCRIPT=$HOME/spark/${spark}/sbin/start-connect-server.sh
## check the spark version already exist, if not download the respective version
if [ -f "${SERVER_SCRIPT}" ];then
echo "Spark Version already exists"
else
if [ -f "${spark}.tgz" ];then
echo "${spark}.tgz already exists"
else
wget "https://dlcdn.apache.org/spark/spark-${version}/${spark}.tgz"
fi
tar -xvf "${spark}.tgz"
fi
cd "${spark}" || exit 1
## check spark remote is running,if not start the spark remote
result=$(${SERVER_SCRIPT} --packages org.apache.spark:${spark_connect}:"${version}" > "$HOME"/spark/log.out; echo $?)
if [ "$result" -ne 0 ]; then
count=$(tail "${HOME}"/spark/log.out | grep -c "SparkConnectServer running as process")
if [ "${count}" == "0" ]; then
echo "Failed to start the server"
exit 1
fi
# Wait for the server to start by pinging localhost:4040
echo "Waiting for the server to start..."
for i in {1..30}; do
if nc -z localhost 4040; then
echo "Server is up and running"
break
fi
echo "Server not yet available, retrying in 5 seconds..."
sleep 5
done
if ! nc -z localhost 4040; then
echo "Failed to start the server within the expected time"
exit 1
fi
fi
echo "Started the Server"