Skip to content

Commit

Permalink
TPC-DS: Prepare tests for MySQL and MariaDB
Browse files Browse the repository at this point in the history
  • Loading branch information
perdelt committed Oct 21, 2024
1 parent a225164 commit 7f0079b
Show file tree
Hide file tree
Showing 12 changed files with 1,141 additions and 4 deletions.
13 changes: 9 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,15 @@ docker build -f Dockerfile -t bexhoma/loader_tpcds_postgresql:latest .
docker push bexhoma/loader_tpcds_postgresql:latest &
cd ..

#cd loader_mysql
#docker build -f Dockerfile -t bexhoma/loader_tpcds_mysql:latest .
#docker push bexhoma/loader_tpcds_mysql:latest &
#cd ..
cd loader_mysql
docker build -f Dockerfile -t bexhoma/loader_tpcds_mysql:latest .
docker push bexhoma/loader_tpcds_mysql:latest &
cd ..

cd loader_mariadb
docker build -f Dockerfile -t bexhoma/loader_tpcds_mariadb:latest .
docker push bexhoma/loader_tpcds_mariadb:latest &
cd ..

cd loader_monetdb
docker build -f Dockerfile -t bexhoma/loader_tpcds_monetdb:latest .
Expand Down
33 changes: 33 additions & 0 deletions images/tpcds/loader_mariadb/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM debian:stable-20221004-slim

RUN apt-get -y update && apt-get clean all

RUN apt-get install -y build-essential
RUN apt-get install -y wget
RUN wget http://download.redis.io/redis-stable.tar.gz && tar xvzf redis-stable.tar.gz && cd redis-stable && make && cp src/redis-cli /usr/local/bin/ && chmod 755 /usr/local/bin/redis-cli

RUN apt-get update && apt install -y mariadb-client

ENV NUM_PODS=4
ENV CHILD=1
ENV BEXHOMA_HOST="www.example.com"
ENV BEXHOMA_PORT 50000
ENV BEXHOMA_CONNECTION="monetdb"
ENV BEXHOMA_EXPERIMENT="12345"
ENV DATABASE tpcds
ENV STORE_RAW_DATA=0
ENV BEXHOMA_SYNCH_LOAD 0
ENV MYSQL_LOADING_FROM "LOCAL"

WORKDIR /tmp

RUN mkdir -p /tmp/tpcds

#COPY ./*.dat /tmp/

COPY ./loader.sh /tmp/loader.sh
RUN ["chmod", "+x", "/tmp/loader.sh"]


CMD ["/bin/bash", "-c", "/tmp/loader.sh"]
#CMD ["/bin/bash", "-c", "while true; do sleep 2; done"]
16 changes: 16 additions & 0 deletions images/tpcds/loader_mariadb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Loader for TPC-DS data into MariaDB

The following parameter (ENV) have been added:

* `NUM_PODS`:
* `CHILD`:
* `BEXHOMA_HOST`:
* `BEXHOMA_PORT`:
* `BEXHOMA_CONNECTION`:
* `BEXHOMA_EXPERIMENT`:
* `DATABASE`:
* `STORE_RAW_DATA`:
* `BEXHOMA_SYNCH_LOAD`:
* `BEXHOMA_USER`:

This folder contains the Dockerfile for a loader, that loads data into MariaDB via `mysql LOAD DATA LOCAL INFILE`.
286 changes: 286 additions & 0 deletions images/tpcds/loader_mariadb/loader.sh

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions images/tpcds/loader_mysql/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
FROM debian:stable-20240110-slim

RUN apt-get -y update && apt-get dist-upgrade && apt-get clean all

RUN apt-get install -y build-essential
RUN apt-get install -y wget
RUN wget http://download.redis.io/redis-stable.tar.gz && tar xvzf redis-stable.tar.gz && cd redis-stable && make && cp src/redis-cli /usr/local/bin/ && chmod 755 /usr/local/bin/redis-cli

#&& apt-get install -y mysql-apt-config && apt-get install -y mysql-shell

RUN apt --fix-missing --fix-broken -y install
RUN apt-get install --fix-missing -y libcurl4
RUN apt-get install -y libssh-4
RUN wget https://cdn.mysql.com//Downloads/MySQL-Shell/mysql-shell_8.0.36-1debian12_amd64.deb
RUN dpkg -i mysql-shell_8.0.36-1debian12_amd64.deb
#RUN wget https://dev.mysql.com/get/Downloads/MySQL-Shell/mysql-shell_8.3.0-1debian12_amd64.deb
#RUN dpkg -i mysql-shell_8.3.0-1debian12_amd64.deb
RUN apt-get install mysql-shell -y
RUN apt-get update

RUN apt-get install -y locales && rm -rf /var/lib/apt/lists/* \
&& localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8

#RUN apt-get install -y locales
#RUN dpkg-reconfigure locales
#RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment
#RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
#RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf
#RUN locale-gen
ENV LC_ALL="en_US.UTF-8"
#ENV LANG="en_US.utf8"

ENV NUM_PODS=4
ENV CHILD=1
ENV BEXHOMA_HOST="www.example.com"
ENV BEXHOMA_PORT 50000
ENV BEXHOMA_CONNECTION="monetdb"
ENV BEXHOMA_EXPERIMENT="12345"
ENV DATABASE tpcds
ENV STORE_RAW_DATA=0
ENV BEXHOMA_SYNCH_LOAD 0
ENV MYSQL_LOADING_THREADS 8
ENV MYSQL_LOADING_PARALLEL 1
ENV MYSQL_LOADING_FROM "LOCAL"

WORKDIR /tmp

RUN mkdir -p /tmp/tpcds

#COPY ./*.dat /tmp/

#COPY ./loader-parallel.sh /tmp/loader.sh
COPY ./loader.sh /tmp/loader.sh
RUN ["chmod", "+x", "/tmp/loader.sh"]


CMD ["/bin/bash", "-c", "/tmp/loader.sh"]
#CMD ["/bin/bash", "-c", "while true; do sleep 2; done"]
16 changes: 16 additions & 0 deletions images/tpcds/loader_mysql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Loader for TPC-DS data into MySQL

The following parameter (ENV) have been added:

* `NUM_PODS`:
* `CHILD`:
* `BEXHOMA_HOST`:
* `BEXHOMA_PORT`:
* `BEXHOMA_CONNECTION`:
* `BEXHOMA_EXPERIMENT`:
* `DATABASE`:
* `STORE_RAW_DATA`:
* `BEXHOMA_SYNCH_LOAD`:
* `BEXHOMA_USER`:

This folder contains the Dockerfile for a loader, that loads data into MySQL via `mysqlsh`.
202 changes: 202 additions & 0 deletions images/tpcds/loader_mysql/loader-parallel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#!/bin/bash

######################## Fix missing locale ########################
export LC_ALL="en_US.UTF-8"

######################## Start timing ########################
DATEANDTIME=$(date '+%d.%m.%Y %H:%M:%S');
echo "NOW: $DATEANDTIME"
SECONDS_START_SCRIPT=$SECONDS

######################## Show general parameters ########################
echo "BEXHOMA_CONNECTION:$BEXHOMA_CONNECTION"
echo "BEXHOMA_EXPERIMENT_RUN:$BEXHOMA_EXPERIMENT_RUN"
echo "BEXHOMA_CONFIGURATION:$BEXHOMA_CONFIGURATION"
echo "BEXHOMA_CLIENT:$BEXHOMA_CLIENT"

######################## Show more parameters ########################
CHILD=$(cat /tmp/tpch/CHILD )
echo "CHILD $CHILD"
echo "NUM_PODS $NUM_PODS"
echo "SF $SF"

######################## Destination of raw data ########################
if test $STORE_RAW_DATA -gt 0
then
# store in (distributed) file system
if test $NUM_PODS -gt 1
then
destination_raw=/data/tpch/SF$SF/$NUM_PODS/$CHILD
else
destination_raw=/data/tpch/SF$SF
fi
else
# only store locally
destination_raw=/tmp/tpch/SF$SF/$NUM_PODS/$CHILD
mkdir -p $destination_raw
fi
echo "destination_raw $destination_raw"
cd $destination_raw

######################## Show generated files ########################
echo "Found these files:"
ls $destination_raw/*tbl* -lh

######################## Wait until all pods of job are ready ########################
if test $BEXHOMA_SYNCH_LOAD -gt 0
then
echo "Querying counter bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT"
# add this pod to counter
redis-cli -h 'bexhoma-messagequeue' incr "bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT"
# wait for number of pods to be as expected
while : ; do
PODS_RUNNING="$(redis-cli -h 'bexhoma-messagequeue' get bexhoma-loader-podcount-$BEXHOMA_CONNECTION-$BEXHOMA_EXPERIMENT)"
echo "Found $PODS_RUNNING / $NUM_PODS running pods"
if test "$PODS_RUNNING" == $NUM_PODS
then
echo "OK"
break
else
echo "We have to wait"
sleep 1
fi
done
fi

######################## Start measurement of time ########################
bexhoma_start_epoch=$(date -u +%s)
SECONDS_START=$SECONDS
echo "Start $SECONDS_START seconds"

######################## Only first loader pod should be active ########################
# this holds for parallel loading, i.e. one client writes all files to host
if test $MYSQL_LOADING_PARALLEL -gt 0
then
if test $CHILD -gt 1
then
echo "Only first loader pod should be active"
bexhoma_end_epoch=$(date -u +%s)
SECONDS_END=$SECONDS
echo "End $SECONDS_END seconds"

DURATION=$((SECONDS_END-SECONDS_START))
echo "Duration $DURATION seconds"

######################## Show timing information ###################
echo "Loading done"

DATEANDTIME=$(date '+%d.%m.%Y %H:%M:%S');
echo "NOW: $DATEANDTIME"

SECONDS_END_SCRIPT=$SECONDS
DURATION_SCRIPT=$((SECONDS_END_SCRIPT-SECONDS_START_SCRIPT))
echo "Duration $DURATION_SCRIPT seconds (script total)"
echo "BEXHOMA_DURATION:$DURATION_SCRIPT"
echo "BEXHOMA_START:$bexhoma_start_epoch"
echo "BEXHOMA_END:$bexhoma_end_epoch"
exit 0
fi
fi

######################## Execute loading ###################
# ordered
#for i in *tbl*; do
# shuffled
for i in `ls *tbl* | shuf`; do
basename=${i%.tbl*}
wordcount=($(wc -l $i))
lines=${wordcount[0]}
if [[ $basename == "nation" ]]
then
COMMAND="util.import_table('$destination_raw/$i', {'schema': 'tpch', 'table': '$basename', 'dialect': 'csv-unix', 'skipRows': 0, 'showProgress': True, 'fieldsTerminatedBy': '|', 'threads': $THREADS})"
#if test $CHILD -gt 1
#then
# continue
#fi
elif [[ $basename == "region" ]]
then
COMMAND="util.import_table('$destination_raw/$i', {'schema': 'tpch', 'table': '$basename', 'dialect': 'csv-unix', 'skipRows': 0, 'showProgress': True, 'fieldsTerminatedBy': '|', 'threads': $THREADS})"
#if test $CHILD -gt 1
#then
# continue
#fi
else
COMMAND="util.import_table(["
for ((j=1;j<=$NUM_PODS;j++));
do
#echo $j
file="'$destination_raw/../$j/$basename.tbl.$j',"
COMMAND=$COMMAND$file
done
COMMAND_END="], {'schema': 'tpch', 'table': '$basename', 'dialect': 'csv-unix', 'skipRows': 0, 'showProgress': True, 'fieldsTerminatedBy': '|', 'threads': $MYSQL_LOADING_THREADS})"
COMMAND=${COMMAND::-1}$COMMAND_END
fi
#COMMAND="COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|','\\n','\"' NULL AS ''"
#COMMAND="COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|' NULL AS ''"
echo "============================"
#COMMAND="util.import_table('$destination_raw/$i', {'schema': 'tpch', 'table': '$basename', 'dialect': 'csv-unix', 'skipRows': 0, 'showProgress': True, 'fieldsTerminatedBy': '|', 'threads': $THREADS})"
echo "$COMMAND"
#OUTPUT="$(mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -s \"COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|' NULL AS ''\" - < $i)"

#FAILED=0 # everything ok
#FAILED=1 # known error
#FAILED=2 # unknown error
FAILED=1
while [ $FAILED == 1 ]
do
FAILED=2
SECONDS_START=$SECONDS
echo "=========="
#time mysqlsh --sql --password=root --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -e "$COMMAND" &>OUTPUT.txt
time mysqlsh --python --password=root --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -e "$COMMAND" &>OUTPUT.txt
echo "Start $SECONDS_START seconds"
SECONDS_END=$SECONDS
echo "End $SECONDS_END seconds"
DURATION=$((SECONDS_END-SECONDS_START))
echo "Duration $DURATION seconds"
#mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -E UTF-8 -L import.log -s "$COMMAND" - < $i &>OUTPUT.txt
#mclient --host $BEXHOMA_HOST --database $DATABASE --port $BEXHOMA_PORT -s "COPY $lines RECORDS INTO $basename FROM STDIN USING DELIMITERS '|','\\n','\"' NULL AS ''" - < $i &>OUTPUT.txt
#cat import.log
OUTPUT=$(cat OUTPUT.txt )
echo "$OUTPUT"
FAILED=0
# everything worked well ("row" and "rows" string checked)
if [[ $OUTPUT == *"$lines affected row"* ]]; then echo "Import ok"; FAILED=0; fi
# rollback, we have to do it again (?)
if [[ $OUTPUT == *"ROLLBACK"* ]]; then echo "ROLLBACK occured"; FAILED=1; fi
# no thread left, we have to do it again (?)
#if [[ $OUTPUT == *"failed to start worker thread"* ]]; then echo "No worker thread"; FAILED=1; fi
#if [[ $OUTPUT == *"failed to start producer thread"* ]]; then echo "No producer thread"; FAILED=1; fi
#if [[ $OUTPUT == *"Challenge string is not valid, it is empty"* ]]; then echo "No Login possible"; FAILED=1; fi
# something else - what?
if [[ $OUTPUT == 2 ]]; then echo "Something unexpected happend"; fi
echo "FAILED = $FAILED at $basename"
if [[ $FAILED != 0 ]]; then echo "Wait 1s before retrying"; sleep 1; fi
done
#echo "COPY $lines RECORDS INTO $basename FROM '/tmp/$i' ON CLIENT DELIMITERS '|' NULL AS '';" >> load.sql
done

######################## End measurement of time ########################
bexhoma_end_epoch=$(date -u +%s)
SECONDS_END=$SECONDS
echo "End $SECONDS_END seconds"

DURATION=$((SECONDS_END-SECONDS_START))
echo "Duration $DURATION seconds"

######################## Show timing information ###################
echo "Loading done"

DATEANDTIME=$(date '+%d.%m.%Y %H:%M:%S');
echo "NOW: $DATEANDTIME"

SECONDS_END_SCRIPT=$SECONDS
DURATION_SCRIPT=$((SECONDS_END_SCRIPT-SECONDS_START_SCRIPT))
echo "Duration $DURATION_SCRIPT seconds (script total)"
echo "BEXHOMA_DURATION:$DURATION_SCRIPT"
echo "BEXHOMA_START:$bexhoma_start_epoch"
echo "BEXHOMA_END:$bexhoma_end_epoch"

######################## Exit successfully ###################
# while true; do sleep 2; done
exit 0
Loading

0 comments on commit 7f0079b

Please sign in to comment.