Skip to content

Commit

Permalink
TPC-DS: MonetDB
Browse files Browse the repository at this point in the history
  • Loading branch information
perdelt committed Oct 9, 2024
1 parent 0f50a94 commit 8cdb473
Show file tree
Hide file tree
Showing 7 changed files with 245 additions and 10 deletions.
12 changes: 6 additions & 6 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ docker push bexhoma/loader_tpcds_postgresql:latest &
cd ..

#cd loader_mysql
#docker build -f Dockerfile -t bexhoma/loader_tpch_mysql:latest .
#docker push bexhoma/loader_tpch_mysql:latest &
#docker build -f Dockerfile -t bexhoma/loader_tpcds_mysql:latest .
#docker push bexhoma/loader_tpcds_mysql:latest &
#cd ..

#cd loader_monetdb
#docker build -f Dockerfile -t bexhoma/loader_tpch_monetdb:latest .
#docker push bexhoma/loader_tpch_monetdb:latest &
#cd ..
cd loader_monetdb
docker build -f Dockerfile -t bexhoma/loader_tpcds_monetdb:latest .
docker push bexhoma/loader_tpcds_monetdb:latest &
cd ..
cd ..


Expand Down
32 changes: 32 additions & 0 deletions images/tpcds/loader_monetdb/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM monetdb/monetdb:Sep2022
#FROM centos:centos7

RUN yum -y update && yum clean all
#RUN yum install -y https://dev.monetdb.org/downloads/epel/MonetDB-release-epel.noarch.rpm
#RUN yum install -y MonetDB-client

RUN yum install -y gcc
RUN yum install -y wget
RUN wget http://download.redis.io/redis-stable.tar.gz && tar xvzf redis-stable.tar.gz && cd redis-stable && make && sudo cp src/redis-cli /usr/local/bin/ && sudo chmod 755 /usr/local/bin/redis-cli

ENV NUM_PODS=4
ENV CHILD=1
ENV BEXHOMA_HOST="www.example.com"
ENV BEXHOMA_PORT 50000
ENV BEXHOMA_CONNECTION="monetdb"
ENV BEXHOMA_EXPERIMENT="12345"
ENV DATABASE demo
ENV STORE_RAW_DATA=0
ENV BEXHOMA_SYNCH_LOAD 0

WORKDIR /tmp

RUN mkdir -p /tmp/tpcds

#COPY ./*.dat /tmp/

COPY ./loader.sh /tmp/loader.sh
RUN ["chmod", "+x", "/tmp/loader.sh"]


CMD ["/bin/bash", "-c", "/tmp/loader.sh"]
16 changes: 16 additions & 0 deletions images/tpcds/loader_monetdb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Loader for TPC-DS data into MonetDB

The following parameter (ENV) have been added:

* `NUM_PODS`:
* `CHILD`:
* `BEXHOMA_HOST`:
* `BEXHOMA_PORT`:
* `BEXHOMA_CONNECTION`:
* `BEXHOMA_EXPERIMENT`:
* `DATABASE`:
* `STORE_RAW_DATA`:
* `BEXHOMA_SYNCH_LOAD`:
* `BEXHOMA_USER`:

This folder contains the Dockerfile for a loader, that loads data into MonetDB via `mclient <`.
164 changes: 164 additions & 0 deletions images/tpcds/loader_monetdb/loader.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/bin/bash

######################## Start timing ########################
DATEANDTIME=$(date '+%d.%m.%Y %H:%M:%S');
echo "NOW: $DATEANDTIME"
SECONDS_START_SCRIPT=$SECONDS

######################## Show general parameters ########################
echo "BEXHOMA_CONNECTION:$BEXHOMA_CONNECTION"
echo "BEXHOMA_EXPERIMENT_RUN:$BEXHOMA_EXPERIMENT_RUN"
echo "BEXHOMA_CONFIGURATION:$BEXHOMA_CONFIGURATION"
echo "BEXHOMA_CLIENT:$BEXHOMA_CLIENT"

######################## Show more parameters ########################
CHILD=$(cat /tmp/tpcds/CHILD )
echo "CHILD $CHILD"
echo "NUM_PODS $NUM_PODS"
echo "SF $SF"

######################## Destination of raw data ########################
if test $STORE_RAW_DATA -gt 0
then
# store in (distributed) file system
if test $NUM_PODS -gt 1
then
destination_raw=/data/tpcds/SF$SF/$NUM_PODS/$CHILD/
else
destination_raw=/data/tpcds/SF$SF/
fi
else
# only store locally
destination_raw=/tmp/tpcds/SF$SF/$NUM_PODS/$CHILD
fi
echo "destination_raw $destination_raw"
cd $destination_raw

######################## Show generated files ########################
echo "Found these files:"
ls $destination_raw/*.dat -lh

######################## Add login parameters for MonetDB ########################
#cd /tmp/tpcds/
echo "user=monetdb
password=monetdb" > .monetdb

######################## Wait until all pods of job are ready ########################
if test $BEXHOMA_SYNCH_LOAD -gt 0
then
echo "Querying counter bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT"
# add this pod to counter
redis-cli -h 'bexhoma-messagequeue' incr "bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT"
# wait for number of pods to be as expected
while : ; do
PODS_RUNNING="$(redis-cli -h 'bexhoma-messagequeue' get bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT)"
echo "Found $PODS_RUNNING / $NUM_PODS running pods"
if test "$PODS_RUNNING" == $NUM_PODS
then
echo "OK"
break
elif test "$PODS_RUNNING" -gt $NUM_PODS
then
echo "Too many pods! Restart occured?"
exit 0
else
echo "We have to wait"
sleep 1
fi
done
fi

######################## Start measurement of time ########################
bexhoma_start_epoch=$(date -u +%s)
SECONDS_START=$SECONDS
echo "Start $SECONDS_START seconds"

######################## Execute loading ###################
# shuffled
#for i in `ls *.dat | shuf`; do
# ordered
for i in *.dat; do
if test $NUM_PODS -gt 1
then
basename=${i%_"$CHILD"_"$NUM_PODS"*}
else
basename=${i%.dat*}
fi
wordcount=($(wc -l $i))
lines=${wordcount[0]}
# skip table if limit to other table is set
if [ -z "${TPCDS_TABLE}" ]
then
echo "table limit not set"
elif [ "${TPCDS_TABLE}" == "$basename" ]
then
echo "limit import to this table $TPCDS_TABLE"
else
echo "skipping $basename, import is limited to other table ($TPCDS_TABLE)"
continue
fi
COMMAND="COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|','\\n','\"' NULL AS ''"
echo "============================"
echo "$COMMAND"
#OUTPUT="$(mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -s \"COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|' NULL AS ''\" - < $i)"

#FAILED=0 # everything ok
#FAILED=1 # known error
#FAILED=2 # unknown error
FAILED=1
while [ $FAILED == 1 ]
do
FAILED=2
SECONDS_START=$SECONDS
echo "=========="
time mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -E UTF-8 -s "$COMMAND" - < $i &> /tmp/OUTPUT.txt
echo "Start $SECONDS_START seconds"
SECONDS_END=$SECONDS
echo "End $SECONDS_END seconds"
DURATION=$((SECONDS_END-SECONDS_START))
echo "Duration $DURATION seconds"
#mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -E UTF-8 -L import.log -s "$COMMAND" - < $i &>OUTPUT.txt
#mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -s "COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|','\\n','\"' NULL AS ''" - < $i &>OUTPUT.txt
#cat import.log
OUTPUT=$(cat /tmp/OUTPUT.txt )
echo "$OUTPUT"
# everything worked well ("row" and "rows" string checked)
if [[ $OUTPUT == *"$lines affected row"* ]]; then echo "Import ok"; FAILED=0; fi
# rollback, we have to do it again (?)
if [[ $OUTPUT == *"ROLLBACK"* ]]; then echo "ROLLBACK occured"; FAILED=1; fi
# no thread left, we have to do it again (?)
if [[ $OUTPUT == *"failed to start worker thread"* ]]; then echo "No worker thread"; FAILED=1; fi
if [[ $OUTPUT == *"failed to start producer thread"* ]]; then echo "No producer thread"; FAILED=1; fi
if [[ $OUTPUT == *"Challenge string is not valid, it is empty"* ]]; then echo "No Login possible"; FAILED=1; fi
# something else - what?
if [[ $OUTPUT == 2 ]]; then echo "Something unexpected happend"; fi
echo "FAILED = $FAILED at $basename"
if [[ $FAILED != 0 ]]; then echo "Wait 1s before retrying"; sleep 1; fi
done
#echo "COPY $lines RECORDS INTO $basename FROM '/tmp/$i' ON CLIENT DELIMITERS '|' NULL AS '';" >> load.sql
done

######################## End measurement of time ########################
bexhoma_end_epoch=$(date -u +%s)
SECONDS_END=$SECONDS
echo "End $SECONDS_END seconds"

DURATION=$((SECONDS_END-SECONDS_START))
echo "Duration $DURATION seconds"

######################## Show timing information ###################
echo "Loading done"

DATEANDTIME=$(date '+%d.%m.%Y %H:%M:%S');
echo "NOW: $DATEANDTIME"

SECONDS_END_SCRIPT=$SECONDS
DURATION_SCRIPT=$((SECONDS_END_SCRIPT-SECONDS_START_SCRIPT))
echo "Duration $DURATION_SCRIPT seconds (script total)"
echo "BEXHOMA_DURATION:$DURATION_SCRIPT"
echo "BEXHOMA_START:$bexhoma_start_epoch"
echo "BEXHOMA_END:$bexhoma_end_epoch"

######################## Exit successfully ###################
# while true; do sleep 2; done
exit 0
27 changes: 25 additions & 2 deletions test-more.sh
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ wait_process "ycsb"


###########################################
################## TPC-DS ##################
################## TPC-DS #################
###########################################


Expand All @@ -782,7 +782,30 @@ nohup python tpcds.py -ms 1 -tr \


#### Wait so that next experiment receives a different code
sleep 600
#sleep 600
wait_process "tpcds"



### TPC-DS Power Test - only MonetDB (TestCases.md)
nohup python tpcds.py -ms 1 -tr \
-sf 1 \
-dt \
-t 1200 \
-dbms MonetDB \
-rnn $BEXHOMA_NODE_SUT -rnl $BEXHOMA_NODE_LOAD -rnb $BEXHOMA_NODE_BENCHMARK \
-ii -ic -is \
-nlp 8 \
-nbp 1 \
-ne 1 \
-nc 1 \
run </dev/null &>$LOG_DIR/test_tpcds_testcase_monetdb_1.log &

#watch -n 30 tail -n 50 $LOG_DIR/test_tpch_testcase_1.log


#### Wait so that next experiment receives a different code
#sleep 600
wait_process "tpcds"


Expand Down
2 changes: 1 addition & 1 deletion tpcds.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
)
config.set_loading(parallel=split_portion, num_pods=loading_pods_total)
if ("MariaDB" in args.dbms or len(args.dbms) == 0):
# MonetDB
# MariaDB
name_format = 'MariaDB-{cluster}-{pods}'
config = configurations.default(experiment=experiment, docker='MariaDB', configuration=name_format.format(cluster=cluster_name, pods=loading_pods_total, split=split_portion), dialect='MySQL', alias='DBMS A1')
config.set_storage(
Expand Down
2 changes: 1 addition & 1 deletion tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
)
config.set_loading(parallel=split_portion, num_pods=loading_pods_total)
if ("MariaDB" in args.dbms or len(args.dbms) == 0):
# MonetDB
# MariaDB
name_format = 'MariaDB-{cluster}-{pods}'
config = configurations.default(experiment=experiment, docker='MariaDB', configuration=name_format.format(cluster=cluster_name, pods=loading_pods_total, split=split_portion), dialect='MySQL', alias='DBMS A1')
config.set_storage(
Expand Down

0 comments on commit 8cdb473

Please sign in to comment.