From 82adf5b273f4d98bee0a13f92a1e0b3381ef0069 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 22:21:08 +0530 Subject: [PATCH 01/22] Upgraded to python3.x version support --- .pre-commit-config.yaml | 57 ++ README.md | 26 +- bin/functions/execute_with_log.py | 133 ++- bin/functions/hibench_prop_env_mapping.py | 37 +- bin/functions/load_config.py | 528 +++++++---- bin/functions/monitor.py | 1050 ++++++++++++++------- bin/functions/monitor_replot.py | 14 +- bin/functions/terminalsize.py | 77 +- bin/functions/test_load_config.py | 218 +++-- bin/report_gen_plot.py | 225 +++-- 10 files changed, 1581 insertions(+), 784 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..740dcf014 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,57 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: debug-statements + # - id: double-quote-string-fixer + - id: name-tests-test + # - id: requirements-txt-fixer + +- repo: https://github.com/asottile/reorder_python_imports + rev: v3.8.2 + hooks: + - id: reorder-python-imports + args: [--py38-plus, --add-import, 'from __future__ import annotations'] + +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 + +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] + +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] + + +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v0.961 +# hooks: +# - id: mypy + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort + +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black diff --git a/README.md b/README.md index ad8e4046a..44624d5fb 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate 3. TeraSort (terasort) TeraSort is a standard benchmark created by Jim Gray. Its input data is generated by Hadoop TeraGen example program. - + 4. Repartition (micro/repartition) - + This workload benchmarks shuffle performance. Input data is generated by Hadoop TeraGen. The workload randomly selects the post-shuffle partition for each record, performs shuffle write and read, evenly repartitioning the records. There are 2 parameters providing options to eliminate data source & sink I/Os: hibench.repartition.cacheinmemory(default: false) and hibench.repartition.disableOutput(default: false), controlling whether or not to 1) cache the input in memory at first 2) write the result to storage 5. Sleep (sleep) @@ -61,10 +61,10 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate 2. K-means clustering (Kmeans) This workload tests the K-means (a well-known clustering algorithm for knowledge discovery and data mining) clustering in spark.mllib. The input data set is generated by GenKMeansDataset based on Uniform Distribution and Guassian Distribution. There is also an optimized K-means implementation based on DAL (Intel Data Analytics Library), which is available in the dal module of sparkbench. - -3. Gaussian Mixture Model (GMM) - Gaussian Mixture Model represents a composite distribution whereby points are drawn from one of k Gaussian sub-distributions, each with its own probability. It's implemented in spark.mllib. The input data set is generated by GenKMeansDataset based on Uniform Distribution and Guassian Distribution. +3. Gaussian Mixture Model (GMM) + + Gaussian Mixture Model represents a composite distribution whereby points are drawn from one of k Gaussian sub-distributions, each with its own probability. It's implemented in spark.mllib. The input data set is generated by GenKMeansDataset based on Uniform Distribution and Guassian Distribution. 4. Logistic Regression (LR) @@ -80,7 +80,7 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate 7. XGBoost (XGBoost) - XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible and portable. This workload is implemented with XGBoost4J-Spark API in spark.mllib and the input data set is generated by GradientBoostedTreeDataGenerator. + XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible and portable. This workload is implemented with XGBoost4J-Spark API in spark.mllib and the input data set is generated by GradientBoostedTreeDataGenerator. 8. Linear Regression (Linear) @@ -125,9 +125,9 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate **Graph Benchmark:** -1. NWeight (nweight) +1. NWeight (nweight) - NWeight is an iterative graph-parallel algorithm implemented by Spark GraphX and pregel. The algorithm computes associations between two vertices that are n-hop away. + NWeight is an iterative graph-parallel algorithm implemented by Spark GraphX and pregel. The algorithm computes associations between two vertices that are n-hop away. **Streaming Benchmarks:** @@ -139,16 +139,16 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate 2. Repartition (streaming/repartition) This workload reads input data from Kafka and changes the level of parallelism by creating more or fewer partitions. It tests the efficiency of data shuffle in the streaming frameworks. - + 3. Stateful Wordcount (wordcount) This workload counts words cumulatively received from Kafka every few seconds. This tests the stateful operator performance and Checkpoint/Acker cost in the streaming frameworks. - + 4. Fixwindow (fixwindow) The workloads performs a window based aggregation. It tests the performance of window operation in the streaming frameworks. - - + + ### Supported Hadoop/Spark/Flink/Storm/Gearpump releases: ### - Hadoop: Apache Hadoop 3.0.x, 3.1.x, 3.2.x, 2.x, CDH5, HDP @@ -159,5 +159,3 @@ There are totally 29 workloads in HiBench. The workloads are divided into 6 cate - Kafka: 0.8.2.2 --- - - diff --git a/bin/functions/execute_with_log.py b/bin/functions/execute_with_log.py index 53ebd4105..d9b7f8076 100755 --- a/bin/functions/execute_with_log.py +++ b/bin/functions/execute_with_log.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,51 +13,78 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -import sys, os, subprocess -from terminalsize import get_terminal_size -from time import time, sleep -import re import fnmatch +import os +import re +import subprocess +import sys +from time import sleep +from time import time + +from terminalsize import get_terminal_size + def load_colors(): - color_script_fn = os.path.join(os.path.dirname(__file__), "color.enabled.sh") + color_script_fn = os.path.join( + os.path.dirname(__file__), + "color.enabled.sh", + ) with open(color_script_fn) as f: - return dict([(k,v.split("'")[1].replace('\e[', "\033[")) for k,v in [x.strip().split('=') for x in f.readlines() if x.strip() and not x.strip().startswith('#')]]) + return { + k: v.split("'")[1].replace(r"\e[", "\033[") + for k, v in [ + x.strip().split("=") + for x in f.readlines() + if x.strip() and not x.strip().startswith("#") + ] + } # noqa: E501 + -Color=load_colors() +Color = load_colors() if int(os.environ.get("HIBENCH_PRINTFULLLOG", 0)): - Color['ret'] = os.linesep + Color["ret"] = os.linesep else: - Color['ret']='\r' + Color["ret"] = "\r" tab_matcher = re.compile("\t") tabstop = 8 + + def replace_tab_to_space(s): def tab_replacer(match): pos = match.start() length = pos % tabstop - if not length: length += tabstop + if not length: + length += tabstop return " " * length + return tab_matcher.sub(tab_replacer, s) + class _Matcher: hadoop = re.compile(r"^.*map\s*=\s*(\d+)%,\s*reduce\s*=\s*(\d+)%.*$") hadoop2 = re.compile(r"^.*map\s+\s*(\d+)%\s+reduce\s+\s*(\d+)%.*$") - spark = re.compile(r"^.*finished task \S+ in stage \S+ \(tid \S+\) in.*on.*\((\d+)/(\d+)\)\s*$") + spark = re.compile( + r"^.*finished task \S+ in stage \S+ \(tid \S+\) in.*on.*\((\d+)/(\d+)\)\s*$", # noqa: E501 + ) + def match(self, line): for p in [self.hadoop, self.hadoop2]: m = p.match(line) if m: - return (float(m.groups()[0]) + float(m.groups()[1]))/2 + return (float(m.groups()[0]) + float(m.groups()[1])) / 2 for p in [self.spark]: m = p.match(line) if m: return float(m.groups()[0]) / float(m.groups()[1]) * 100 - + + matcher = _Matcher() + def show_with_progress_bar(line, progress, line_width): """ Show text with progress bar. @@ -70,33 +97,45 @@ def show_with_progress_bar(line, progress, line_width): if len(line) < line_width: line = line + " " * (line_width - len(line)) line = "{On_Yellow}{line_seg1}{On_Blue}{line_seg2}{Color_Off}{ret}".format( - line_seg1 = line[:pos], line_seg2 = line[pos:], **Color) + line_seg1=line[:pos], + line_seg2=line[pos:], + **Color, + ) sys.stdout.write(line) + def execute(workload_result_file, command_lines): - proc = subprocess.Popen(" ".join(command_lines), shell=True, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc = subprocess.Popen( + " ".join(command_lines), + shell=True, + bufsize=1, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) count = 100 - last_time=0 - log_file = open(workload_result_file, 'w') + last_time = 0 + log_file = open(workload_result_file, "w") # see http://stackoverflow.com/a/4417735/1442961 lines_iterator = iter(proc.stdout.readline, b"") for line in lines_iterator: count += 1 - if count > 100 or time()-last_time>1: # refresh terminal size for 100 lines or each seconds + if ( + count > 100 or time() - last_time > 1 + ): # refresh terminal size for 100 lines or each seconds count, last_time = 0, time() width, height = get_terminal_size() width -= 1 try: line = line.rstrip() - log_file.write(line+"\n") + log_file.write(line + "\n") log_file.flush() except KeyboardInterrupt: proc.terminate() break - line = line.decode('utf-8') + line = line.decode("utf-8") line = replace_tab_to_space(line) - #print "{Red}log=>{Color_Off}".format(**Color), line + # print("{Red}log=>{Color_Off}".format(**Color), line) lline = line.lower() def table_not_found_in_log(line): @@ -116,29 +155,49 @@ def database_default_exist_in_log(line): return False def uri_with_key_not_found_in_log(line): - uri_with_key_not_found = "Could not find uri with key [dfs.encryption.key.provider.uri]" + uri_with_key_not_found = ( + "Could not find uri with key [dfs.encryption.key.provider.uri]" + ) if uri_with_key_not_found in line: return True else: return False - if ('error' in lline) and lline.lstrip() == lline: - #Bypass hive 'error's and KeyProviderCache error - bypass_error_condition = table_not_found_in_log or database_default_exist_in_log(lline) or uri_with_key_not_found_in_log(lline) + if ("error" in lline) and lline.lstrip() == lline: + # Bypass hive 'error's and KeyProviderCache error + bypass_error_condition = ( + table_not_found_in_log + or database_default_exist_in_log( + lline, + ) + or uri_with_key_not_found_in_log(lline) + ) if not bypass_error_condition: COLOR = "Red" - sys.stdout.write((u"{%s}{line}{Color_Off}{ClearEnd}\n" % COLOR).format(line=line,**Color).encode('utf-8')) - + sys.stdout.write( + ("{%s}{line}{Color_Off}{ClearEnd}\n" % COLOR) + .format( + line=line, + **Color, + ) + .encode("utf-8"), + ) + else: if len(line) >= width: - line = line[:width-4]+'...' + line = line[: width - 4] + "..." progress = matcher.match(lline) if progress is not None: show_with_progress_bar(line, progress, width) else: - sys.stdout.write(u"{line}{ClearEnd}{ret}".format(line=line, **Color).encode('utf-8')) + sys.stdout.write( + "{line}{ClearEnd}{ret}".format( + line=line, + **Color, + ).encode("utf-8"), + ) sys.stdout.flush() - print + print() log_file.close() try: proc.wait() @@ -147,6 +206,7 @@ def uri_with_key_not_found_in_log(line): return 1 return proc.returncode + def test_progress_bar(): for i in range(101): show_with_progress_bar("test progress : %d" % i, i, 80) @@ -154,7 +214,12 @@ def test_progress_bar(): sleep(0.05) -if __name__=="__main__": - sys.exit(execute(workload_result_file=sys.argv[1], - command_lines=sys.argv[2:])) + +if __name__ == "__main__": + sys.exit( + execute( + workload_result_file=sys.argv[1], + command_lines=sys.argv[2:], + ), + ) # test_progress_bar() diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py index 49a9c2381..ecca09fdd 100644 --- a/bin/functions/hibench_prop_env_mapping.py +++ b/bin/functions/hibench_prop_env_mapping.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,11 +13,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ Mapping from properties to environment variable names """ -HiBenchEnvPropMappingMandatory=dict( +from __future__ import annotations + +HiBenchEnvPropMappingMandatory = dict( JAVA_BIN="java.bin", HADOOP_HOME="hibench.hadoop.home", HDFS_MASTER="hibench.hdfs.master", @@ -27,26 +28,22 @@ HADOOP_CONF_DIR="hibench.hadoop.configure.dir", HIBENCH_HOME="hibench.home", HIBENCH_CONF="hibench.configure.dir", - REPORT_COLUMN_FORMATS="hibench.report.formats", SPARKBENCH_JAR="hibench.sparkbench.jar", NUM_MAPS="hibench.default.map.parallelism", NUM_REDS="hibench.default.shuffle.parallelism", INPUT_HDFS="hibench.workload.input", OUTPUT_HDFS="hibench.workload.output", - REDUCER_CONFIG_NAME="hibench.hadoop.reducer.name", MAP_CONFIG_NAME="hibench.hadoop.mapper.name", - MASTERS="hibench.masters.hostnames", SLAVES="hibench.slaves.hostnames", - ) +) -HiBenchEnvPropMapping=dict( +HiBenchEnvPropMapping = dict( SPARK_HOME="hibench.spark.home", SPARK_MASTER="hibench.spark.master", SPARK_EXAMPLES_JAR="hibench.spark.examples.jar", - HIVE_HOME="hibench.hive.home", HIVE_RELEASE="hibench.hive.release", HIVEBENCH_TEMPLATE="hibench.hivebench.template.dir", @@ -56,8 +53,8 @@ NUTCH_BASE_HDFS="hibench.nutch.base.hdfs", NUTCH_INPUT="hibench.nutch.dir.name.input", NUTCH_DIR="hibench.nutch.nutchindexing.dir", - HIBENCH_REPORT="hibench.report.dir", # set in default - HIBENCH_REPORT_NAME="hibench.report.name", # set in default + HIBENCH_REPORT="hibench.report.dir", # set in default + HIBENCH_REPORT_NAME="hibench.report.name", # set in default YARN_NUM_EXECUTORS="hibench.yarn.executor.num", YARN_EXECUTOR_CORES="hibench.yarn.executor.cores", SPARK_YARN_EXECUTOR_MEMORY="spark.executor.memory", @@ -73,7 +70,6 @@ CACHE_IN_MEMORY="hibench.repartition.cacheinmemory", DISABLE_OUTPUT="hibench.repartition.disableoutput", FROM_HDFS="hibench.repartition.fromhdfs", - # For hive related workload, data scale PAGES="hibench.workload.pages", USERVISITS="hibench.workload.uservisits", @@ -133,7 +129,7 @@ NUM_EXAMPLES_PCA="hibench.pca.examples", NUM_FEATURES_PCA="hibench.pca.features", PCA_K="hibench.pca.k", - MAX_RESULT_SIZE_PCA ="hibench.pca.maxresultsize", + MAX_RESULT_SIZE_PCA="hibench.pca.maxresultsize", # For Gradient Boosting Tree NUM_EXAMPLES_GBT="hibench.gbt.examples", NUM_FEATURES_GBT="hibench.gbt.features", @@ -211,11 +207,9 @@ STORAGE_LEVEL="hibench.nweight.storage_level", DISABLE_KRYO="hibench.nweight.disable_kryo", MODEL="hibench.nweight.model", - # For streaming bench STREAMING_TESTCASE="hibench.streambench.testCase", COMMON_JAR="hibench.common.jar", - # prepare STREAMING_TOPIC_NAME="hibench.streambench.kafka.topic", STREAMING_KAFKA_HOME="hibench.streambench.kafka.home", @@ -229,28 +223,25 @@ STREAMING_DATA2_CLUSTER_DIR="hibench.streambench.datagen.data2_cluster.dir", STREAMING_PARTITIONS="hibench.streambench.kafka.topicPartitions", DATA_GEN_JAR="hibench.streambench.datagen.jar", - # metrics reader METRICE_READER_SAMPLE_NUM="hibench.streambench.metricsReader.sampleNum", METRICS_READER_THREAD_NUM="hibench.streambench.metricsReader.threadNum", METRICS_READER_OUTPUT_DIR="hibench.streambench.metricsReader.outputDir", - # sparkstreaming STREAMBENCH_SPARK_JAR="hibench.streambench.sparkbench.jar", STREAMBENCH_STORM_JAR="hibench.streambench.stormbench.jar", - # gearpump GEARPUMP_HOME="hibench.streambench.gearpump.home", STREAMBENCH_GEARPUMP_JAR="hibench.streambench.gearpump.jar", STREAMBENCH_GEARPUMP_EXECUTORS="hibench.streambench.gearpump.executors", - # flinkstreaming HIBENCH_FLINK_MASTER="hibench.flink.master", FLINK_HOME="hibench.streambench.flink.home", STREAMBENCH_FLINK_JAR="hibench.streambench.flinkbench.jar", STREAMBENCH_FLINK_PARALLELISM="hibench.streambench.flink.parallelism", +) - ) - -HiBenchPropEnvMapping=dict([(v,k) for k, v in HiBenchEnvPropMapping.items()]) -HiBenchPropEnvMappingMandatory=dict([(v,k) for k, v in HiBenchEnvPropMappingMandatory.items()]) +HiBenchPropEnvMapping = {v: k for k, v in HiBenchEnvPropMapping.items()} +HiBenchPropEnvMappingMandatory = { + v: k for k, v in HiBenchEnvPropMappingMandatory.items() +} diff --git a/bin/functions/load_config.py b/bin/functions/load_config.py index 655e4ea27..99bf23134 100755 --- a/bin/functions/load_config.py +++ b/bin/functions/load_config.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,17 +13,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -import sys -import os +import fcntl import glob +import os import re -import urllib import socket - -from contextlib import closing +import subprocess +import sys +import time +import urllib from collections import defaultdict -from hibench_prop_env_mapping import HiBenchEnvPropMappingMandatory, HiBenchEnvPropMapping +from contextlib import closing + +from hibench_prop_env_mapping import HiBenchEnvPropMapping +from hibench_prop_env_mapping import HiBenchEnvPropMappingMandatory HibenchConf = {} HibenchConfRef = {} @@ -36,19 +41,16 @@ def log(*s): s = s[0] else: s = " ".join([str(x) for x in s]) - sys.stderr.write(str(s) + '\n') + sys.stderr.write(str(s) + "\n") def log_debug(*s): # log(*s) pass + # copied from http://stackoverflow.com/questions/3575554/python-subprocess-with-timeout-and-large-output-64k # Comment: I have a better solution, but I'm too lazy to write. -import fcntl -import os -import subprocess -import time def nonBlockRead(output): @@ -58,7 +60,7 @@ def nonBlockRead(output): try: return output.read() except: - return '' + return "" def execute_cmd(cmdline, timeout): @@ -74,17 +76,18 @@ def execute_cmd(cmdline, timeout): bufsize=0, # default value of 0 (unbuffered) is best shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE + stderr=subprocess.PIPE, ) t_begin = time.time() # Monitor execution time seconds_passed = 0 - stdout = '' - stderr = '' + stdout = "" + stderr = "" while p.poll() is None and ( - seconds_passed < timeout or timeout == 0): # Monitor process + seconds_passed < timeout or timeout == 0 + ): # Monitor process time.sleep(0.1) # Wait a little seconds_passed = time.time() - t_begin @@ -95,21 +98,22 @@ def execute_cmd(cmdline, timeout): try: p.stdout.close() # If they are not closed the fds will hang around until p.stderr.close() # os.fdlimit is exceeded and cause a nasty exception - p.terminate() # Important to close the fds prior to terminating the process! + p.terminate() # Important to close the fds prior to terminating the process! # NOTE: Are there any other "non-freed" resources? except: pass - return ('Timeout', stdout, stderr) + return ("Timeout", stdout, stderr) return (p.returncode, stdout, stderr) def shell(cmd, timeout=5): assert not "${" in cmd, "Error, missing configurations: %s" % ", ".join( - re.findall("\$\{(.*)\}", cmd)) + re.findall(r"\$\{(.*)\}", cmd), + ) retcode, stdout, stderr = execute_cmd(cmd, timeout) - if retcode == 'Timeout': + if retcode == "Timeout": log("ERROR, execute cmd: '%s' timedout." % cmd) log(" STDOUT:\n" + stdout) log(" STDERR:\n" + stderr) @@ -124,7 +128,11 @@ def exactly_one_file(filename_candidate_list, config_name): result = exactly_one_file_one_candidate(filename_pattern) if result != "": return result - assert 0, "No files found under certain path(s), please set `" + config_name + "` manually" + assert 0, ( + "No files found under certain path(s), please set `" + + config_name + + "` manually" + ) def exactly_one_file_one_candidate(filename_pattern): @@ -134,20 +142,25 @@ def exactly_one_file_one_candidate(filename_pattern): elif len(files) == 1: return files[0] else: - assert 0, "The pattern " + filename_pattern + \ - " matches more than one file, please remove the redundant files" + assert 0, ( + "The pattern " + + filename_pattern + + " matches more than one file, please remove the redundant files" + ) def read_file_content(filepath): file_content = [] - if(len(glob.glob(filepath)) == 1): + if len(glob.glob(filepath)) == 1: with open(filepath) as f: file_content = f.readlines() return file_content def parse_conf(conf_root, workload_config_file): - conf_files = sorted(glob.glob(conf_root + "/*.conf")) + sorted(glob.glob(workload_config_file)) + conf_files = sorted(glob.glob(conf_root + "/*.conf")) + sorted( + glob.glob(workload_config_file), + ) # load values from conf files for filename in conf_files: @@ -157,10 +170,10 @@ def parse_conf(conf_root, workload_config_file): line = line.strip() if not line: continue # skip empty lines - if line[0] == '#': + if line[0] == "#": continue # skip comments try: - key, value = re.split("\s", line, 1) + key, value = re.split(r"\s", line, 1) except ValueError: key = line.strip() value = "" @@ -170,12 +183,18 @@ def parse_conf(conf_root, workload_config_file): def override_conf_from_environment(): # override values from os environment variable settings - for env_name, prop_name in HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items(): + for env_name, prop_name in ( + HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() + ): # The overrides from environments has 2 premises, the second one is either # the prop_name is not set in advance by config files or the conf line # itself set an env variable to a hibench conf - if env_name in os.environ and (not HibenchConf.get( - prop_name) or HibenchConf.get(prop_name) == "$" + env_name): + if env_name in os.environ and ( + not HibenchConf.get( + prop_name, + ) + or HibenchConf.get(prop_name) == "$" + env_name + ): env_value = os.getenv(env_name) HibenchConf[prop_name] = env_value HibenchConfRef[prop_name] = "OS environment variable:%s" % env_name @@ -189,8 +208,8 @@ def override_conf_by_paching_conf(): # HibenchConf[prop_name] = env_value # HibenchConfRef[prop_name] = "OS environment variable:%s" % env_name # override values by patching config - for item in [x for x in patching_config.split(',') if x]: - key, value = re.split('=', item, 1) + for item in [x for x in patching_config.split(",") if x]: + key, value = re.split("=", item, 1) HibenchConf[key] = value.strip() HibenchConfRef[key] = "Overrided by parent script during calling: " + item @@ -219,30 +238,43 @@ def load_config(conf_root, workload_config_file, workload_folder, patching_confi waterfall_config(force=True) # check check_config() - #import pdb;pdb.set_trace() + # import pdb;pdb.set_trace() # Export config to file, let bash script to import as local variables. - print export_config(workload_name, framework_name) + print(export_config(workload_name, framework_name)) -def check_config(): # check configures +def check_config(): # check configures # Ensure mandatory configures are available for _, prop_name in HiBenchEnvPropMappingMandatory.items(): - assert HibenchConf.get( - prop_name, None) is not None, "Mandatory configure missing: %s" % prop_name + assert HibenchConf.get(prop_name, None) is not None, ( + "Mandatory configure missing: %s" % prop_name + ) # Ensure all ref values in configure has been expanded - for _, prop_name in HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items(): - assert "${" not in HibenchConf.get(prop_name, ""), "Unsolved ref key: %s. \n Defined at %s:\n Unsolved value:%s\n" % ( - prop_name, HibenchConfRef.get(prop_name, "unknown"), HibenchConf.get(prop_name, "unknown")) - - -def waterfall_config(force=False): # replace "${xxx}" to its values + for _, prop_name in ( + HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() + ): + assert "${" not in HibenchConf.get( + prop_name, + "", + ), "Unsolved ref key: {}. \n Defined at {}:\n Unsolved value:{}\n".format( + prop_name, + HibenchConfRef.get( + prop_name, + "unknown", + ), + HibenchConf.get(prop_name, "unknown"), + ) + + +def waterfall_config(force=False): # replace "${xxx}" to its values no_value_sign = "___###NO_VALUE_SIGN###___" def process_replace(m): raw_key = m.groups()[0] -# key, default_value = (raw_key[2:-1].strip().split(":-") + [None])[:2] + # key, default_value = (raw_key[2:-1].strip().split(":-") + [None])[:2] key, spliter, default_value = ( - re.split("(:-|:_)", raw_key[2:-1].strip()) + [None, None])[:3] + re.split("(:-|:_)", raw_key[2:-1].strip()) + [None, None] + )[:3] log_debug( "key:", @@ -250,15 +282,15 @@ def process_replace(m): " value:", HibenchConf.get( key, - "RAWKEY:" + - raw_key), - "default value:" + - repr(default_value)) + "RAWKEY:" + raw_key, + ), + "default value:" + repr(default_value), + ) if force: if default_value is None: return HibenchConf.get(key) else: - if spliter == ':_' and not default_value: # no return + if spliter == ":_" and not default_value: # no return return no_value_sign return HibenchConf.get(key, default_value) else: @@ -270,7 +302,9 @@ def wildcard_replacement(key, value): if "*" in key: # we meet a wildcard replacement situation if len(key.split("*")) == len(value.split("*")): - key_searcher = re.compile("^" + "(.*)".join(key.split("*")) + "$") + key_searcher = re.compile( + "^" + "(.*)".join(key.split("*")) + "$", + ) matched_keys_to_remove = [] for k in HibenchConf.keys(): matched_keys = key_searcher.match(k) @@ -278,12 +312,17 @@ def wildcard_replacement(key, value): matched_keys_to_remove.append(k) if not "*" in k: splited_value = value.split("*") - new_key = splited_value[ - 0] + "".join([matched_keys.groups()[idx] + x for idx, x in enumerate(splited_value[1:])]) + new_key = splited_value[0] + "".join( + [ + matched_keys.groups()[idx] + x + for idx, x in enumerate(splited_value[1:]) + ], + ) HibenchConf[new_key] = HibenchConf[k] HibenchConfRef[ - new_key] = "Generated by wildcard rule: %s -> %s" % (key, value) + new_key + ] = f"Generated by wildcard rule: {key} -> {value}" for key in matched_keys_to_remove: del HibenchConf[key] return True @@ -292,7 +331,7 @@ def wildcard_replacement(key, value): return True return False - p = re.compile("(\$\{\s*[^\s^\$^\}]+\s*\})") + p = re.compile(r"(\$\{\s*[^\s^\$^\}]+\s*\})") wildcard_rules = [] finish = False @@ -305,7 +344,7 @@ def wildcard_replacement(key, value): key = p.sub(process_replace, key) value = p.sub(process_replace, value) if key != old_key: - #log_debug("update key:", key, old_key) + # log_debug("update key:", key, old_key) HibenchConf[key] = HibenchConf[old_key] del HibenchConf[old_key] finish = False @@ -314,7 +353,10 @@ def wildcard_replacement(key, value): HibenchConf[key] = value finish = False - wildcard_rules = [(key, HibenchConf[key]) for key in HibenchConf if "*" in key] + wildcard_rules = [ + (key, HibenchConf[key]) + for key in HibenchConf if "*" in key + ] # now, let's check wildcard replacement rules for key, value in wildcard_rules: # check if we found a rule like: aaa.*.ccc.*.ddd -> bbb.*.* @@ -328,9 +370,10 @@ def wildcard_replacement(key, value): # switch the order of two wildcards, something like the # first wildcard in key to match the second wildcard in # value. I just don't think it'll be needed. - if not wildcard_replacement(key, value): # not wildcard rules? re-add + # not wildcard rules? re-add + if not wildcard_replacement(key, value): HibenchConf[key] = value - if wildcard_rules: # need try again + if wildcard_rules: # need try again wildcard_rules = [] else: break @@ -345,17 +388,21 @@ def probe_java_bin(): # probe JAVA_HOME if not HibenchConf.get("java.bin", ""): # probe java bin - if os.environ.get('JAVA_HOME', ''): + if os.environ.get("JAVA_HOME", ""): # lookup in os environment - HibenchConf['java.bin'] = os.path.join(os.environ.get('JAVA_HOME'), "bin", "java") - HibenchConfRef['java.bin'] = "probed from os environment of JAVA_HOME" + HibenchConf["java.bin"] = os.path.join( + os.environ.get("JAVA_HOME"), + "bin", + "java", + ) + HibenchConfRef["java.bin"] = "probed from os environment of JAVA_HOME" else: # lookup in path - path_dirs = os.environ.get('PATH', '').split(':') + path_dirs = os.environ.get("PATH", "").split(":") for path in path_dirs: if os.path.isfile(os.path.join(path, "java")): - HibenchConf['java.bin'] = os.path.join(path, "java") - HibenchConfRef['java.bin'] = "probed by lookup in $PATH: " + path + HibenchConf["java.bin"] = os.path.join(path, "java") + HibenchConfRef["java.bin"] = "probed by lookup in $PATH: " + path break else: # still not found? @@ -365,58 +412,80 @@ def probe_java_bin(): def probe_hadoop_release(): # probe hadoop release. only support apache if not HibenchConf.get("hibench.hadoop.release", ""): - cmd_release_and_version = HibenchConf['hibench.hadoop.executable'] + ' version | head -1' + cmd_release_and_version = ( + HibenchConf["hibench.hadoop.executable"] + " version | head -1" + ) # version here means, for example apache hadoop {2.7.3} hadoop_release_and_version = shell(cmd_release_and_version).strip() - HibenchConf["hibench.hadoop.release"] = \ - "apache" if "Hadoop" in hadoop_release_and_version else \ - "UNKNOWN" - HibenchConfRef["hibench.hadoop.release"] = "Inferred by: hadoop executable, the path is:\"%s\"" % HibenchConf[ - 'hibench.hadoop.executable'] + HibenchConf["hibench.hadoop.release"] = ( + "apache" if "Hadoop" in hadoop_release_and_version else "UNKNOWN" + ) + HibenchConfRef["hibench.hadoop.release"] = ( + 'Inferred by: hadoop executable, the path is:"%s"' + % HibenchConf["hibench.hadoop.executable"] + ) + + assert HibenchConf["hibench.hadoop.release"] in [ + "apache", + ], "Unknown hadoop release. Auto probe failed, please override `hibench.hadoop.release` to explicitly define this property, only apache is supported" - assert HibenchConf["hibench.hadoop.release"] in ["apache"], "Unknown hadoop release. Auto probe failed, please override `hibench.hadoop.release` to explicitly define this property, only apache is supported" - def probe_hadoop_examples_jars(): # probe hadoop example jars if not HibenchConf.get("hibench.hadoop.examples.jar", ""): - examples_jars_candidate_apache0 = HibenchConf[ - 'hibench.hadoop.home'] + "/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar" + examples_jars_candidate_apache0 = ( + HibenchConf["hibench.hadoop.home"] + + "/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar" + ) examples_jars_candidate_list = [ - examples_jars_candidate_apache0 - ] + examples_jars_candidate_apache0, + ] HibenchConf["hibench.hadoop.examples.jar"] = exactly_one_file( - examples_jars_candidate_list, "hibench.hadoop.examples.jar") - HibenchConfRef["hibench.hadoop.examples.jar"] = "Inferred by " + \ - HibenchConf["hibench.hadoop.examples.jar"] + examples_jars_candidate_list, + "hibench.hadoop.examples.jar", + ) + HibenchConfRef["hibench.hadoop.examples.jar"] = ( + "Inferred by " + HibenchConf["hibench.hadoop.examples.jar"] + ) def probe_hadoop_examples_test_jars(): # probe hadoop examples test jars if not HibenchConf.get("hibench.hadoop.examples.test.jar", ""): - examples_test_jars_candidate_apache0 = HibenchConf[ - 'hibench.hadoop.home'] + "/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient*-tests.jar" + examples_test_jars_candidate_apache0 = ( + HibenchConf["hibench.hadoop.home"] + + "/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient*-tests.jar" + ) examples_test_jars_candidate_list = [ - examples_test_jars_candidate_apache0 - ] + examples_test_jars_candidate_apache0, + ] HibenchConf["hibench.hadoop.examples.test.jar"] = exactly_one_file( - examples_test_jars_candidate_list, "hibench.hadoop.examples.test.jar") - HibenchConfRef["hibench.hadoop.examples.test.jar"] = "Inferred by " + \ - HibenchConf["hibench.hadoop.examples.test.jar"] + examples_test_jars_candidate_list, + "hibench.hadoop.examples.test.jar", + ) + HibenchConfRef["hibench.hadoop.examples.test.jar"] = ( + "Inferred by " + HibenchConf["hibench.hadoop.examples.test.jar"] + ) def probe_sleep_job_jar(): # set hibench.sleep.job.jar - if not HibenchConf.get('hibench.sleep.job.jar', ''): - log("probe sleep jar:", HibenchConf['hibench.hadoop.examples.test.jar']) - HibenchConf["hibench.sleep.job.jar"] = HibenchConf['hibench.hadoop.examples.test.jar'] + if not HibenchConf.get("hibench.sleep.job.jar", ""): + log( + "probe sleep jar:", + HibenchConf["hibench.hadoop.examples.test.jar"], + ) + HibenchConf["hibench.sleep.job.jar"] = HibenchConf[ + "hibench.hadoop.examples.test.jar" + ] HibenchConfRef[ - "hibench.sleep.job.jar"] = "Refer to `hibench.hadoop.examples.test.jar` according to the evidence of `hibench.hadoop.release`" + "hibench.sleep.job.jar" + ] = "Refer to `hibench.hadoop.examples.test.jar` according to the evidence of `hibench.hadoop.release`" def probe_hadoop_configure_dir(): @@ -424,8 +493,13 @@ def probe_hadoop_configure_dir(): if not HibenchConf.get("hibench.hadoop.configure.dir", ""): # For Apache, HDP, and CDH release HibenchConf["hibench.hadoop.configure.dir"] = join( - HibenchConf["hibench.hadoop.home"], "etc", "hadoop") - HibenchConfRef["hibench.hadoop.configure.dir"] = "Inferred by: `hibench.hadoop.home`" + HibenchConf["hibench.hadoop.home"], + "etc", + "hadoop", + ) + HibenchConfRef[ + "hibench.hadoop.configure.dir" + ] = "Inferred by: `hibench.hadoop.home`" def probe_mapper_reducer_names(): @@ -447,12 +521,16 @@ def probe_spark_conf_value(conf_name, default_value): file_content = read_file_content(spark_env_file) for line in file_content: - if not line.strip().startswith( - "#") and conf_name in line: - if "\"" in line: - value = line.split("=")[1].split("\"")[1] - elif "\'" in line: - value = line.split("=")[1].split("\'")[1] + if ( + not line.strip().startswith( + "#", + ) + and conf_name in line + ): + if '"' in line: + value = line.split("=")[1].split('"')[1] + elif "'" in line: + value = line.split("=")[1].split("'")[1] else: value = line.split("=")[1] value = value.strip() @@ -468,36 +546,59 @@ def probe_spark_worker_webui_port(): def probe_masters_slaves_by_Yarn(): - yarn_executable = os.path.join(os.path.dirname( - HibenchConf['hibench.hadoop.executable']), "yarn") + yarn_executable = os.path.join( + os.path.dirname( + HibenchConf["hibench.hadoop.executable"], + ), + "yarn", + ) cmd = "( " + yarn_executable + " node -list 2> /dev/null | grep RUNNING )" try: - worker_hostnames = [line.split(":")[0] for line in shell(cmd).split("\n")] - HibenchConf['hibench.slaves.hostnames'] = " ".join(worker_hostnames) - HibenchConfRef['hibench.slaves.hostnames'] = "Probed by parsing results from: " + cmd + worker_hostnames = [ + line.split(":")[0] + for line in shell(cmd).split("\n") + ] + HibenchConf["hibench.slaves.hostnames"] = " ".join(worker_hostnames) + HibenchConfRef["hibench.slaves.hostnames"] = ( + "Probed by parsing results from: " + cmd + ) # parse yarn resource manager from hadoop conf - yarn_site_file = os.path.join(HibenchConf["hibench.hadoop.configure.dir"], "yarn-site.xml") + yarn_site_file = os.path.join( + HibenchConf["hibench.hadoop.configure.dir"], + "yarn-site.xml", + ) with open(yarn_site_file) as f: file_content = f.read() match_address = re.findall( - "\\s*\\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>", - file_content) + r"\\s*\\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>", + file_content, + ) match_hostname = re.findall( - "\\s*\\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>", - file_content) + r"\\s*\\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>", + file_content, + ) if match_address: resourcemanager_hostname = match_address[0][0] - HibenchConf['hibench.masters.hostnames'] = resourcemanager_hostname - HibenchConfRef['hibench.masters.hostnames'] = "Parsed from " + yarn_site_file + HibenchConf["hibench.masters.hostnames"] = resourcemanager_hostname + HibenchConfRef["hibench.masters.hostnames"] = ( + "Parsed from " + yarn_site_file + ) elif match_hostname: resourcemanager_hostname = match_hostname[0][0] - HibenchConf['hibench.masters.hostnames'] = resourcemanager_hostname - HibenchConfRef['hibench.masters.hostnames'] = "Parsed from " + yarn_site_file + HibenchConf["hibench.masters.hostnames"] = resourcemanager_hostname + HibenchConfRef["hibench.masters.hostnames"] = ( + "Parsed from " + yarn_site_file + ) else: - assert 0, "Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and \"yarn-site.xml\" file" + assert ( + 0 + ), 'Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and "yarn-site.xml" file' except Exception as e: - assert 0, "Get workers from yarn-site.xml page failed, reason:%s\nplease set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" % e + assert 0, ( + "Get workers from yarn-site.xml page failed, reason:%s\nplease set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" + % e + ) def probe_masters_slaves_hostnames(): @@ -507,63 +608,111 @@ def probe_masters_slaves_hostnames(): if not ( HibenchConf.get( "hibench.masters.hostnames", - "") and HibenchConf.get( + "", + ) + and HibenchConf.get( "hibench.slaves.hostnames", - "")): # no pre-defined hostnames, let's probe + "", + ) + ): # no pre-defined hostnames, let's probe if not (HibenchConf.get("hibench.spark.master", "")): probe_masters_slaves_by_Yarn() else: - spark_master = HibenchConf['hibench.spark.master'] + spark_master = HibenchConf["hibench.spark.master"] # local mode if spark_master.startswith("local"): - HibenchConf['hibench.masters.hostnames'] = '' # no master + HibenchConf["hibench.masters.hostnames"] = "" # no master # localhost as slaves - HibenchConf['hibench.slaves.hostnames'] = 'localhost' - HibenchConfRef['hibench.masters.hostnames'] = HibenchConfRef[ - 'hibench.slaves.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master + HibenchConf["hibench.slaves.hostnames"] = "localhost" + HibenchConfRef["hibench.masters.hostnames"] = HibenchConfRef[ + "hibench.slaves.hostnames" + ] = ( + "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master + ) # spark standalone mode elif spark_master.startswith("spark"): - HibenchConf['hibench.masters.hostnames'] = spark_master[8:].split(":")[0] - HibenchConfRef[ - 'hibench.masters.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master + HibenchConf["hibench.masters.hostnames"] = spark_master[8:].split(":")[ + 0 + ] + HibenchConfRef["hibench.masters.hostnames"] = ( + "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master + ) try: - log(spark_master, HibenchConf['hibench.masters.hostnames']) + log(spark_master, HibenchConf["hibench.masters.hostnames"]) master_port = probe_spark_master_webui_port() worker_port = probe_spark_worker_webui_port() # Make the assumption that the master is in internal network, and force # not to use any proxies - with closing(urllib.urlopen('http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port), proxies={})) as page: + with closing( + urllib.urlopen( + "http://{}:{}".format( + HibenchConf["hibench.masters.hostnames"], + master_port, + ), + proxies={}, + ), + ) as page: worker_hostnames = [] for x in page.readlines(): - matches = re.findall("http:\/\/([a-zA-Z\-\._0-9]+):%s" % worker_port, x) + matches = re.findall( + r"http:\/\/([a-zA-Z\-\._0-9]+):%s" % worker_port, + x, + ) if matches: worker_hostnames.append(matches[0]) - HibenchConf['hibench.slaves.hostnames'] = " ".join(worker_hostnames) - HibenchConfRef['hibench.slaves.hostnames'] = "Probed by parsing " + \ - 'http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port) + HibenchConf["hibench.slaves.hostnames"] = " ".join( + worker_hostnames, + ) + HibenchConfRef[ + "hibench.slaves.hostnames" + ] = "Probed by parsing " + "http://{}:{}".format( + HibenchConf["hibench.masters.hostnames"], + master_port, + ) except Exception as e: - assert 0, "Get workers from spark master's web UI page failed, \nPlease check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: %s, slave_port:%s" % ( - master_port, worker_port) + assert ( + 0 + ), "Get workers from spark master's web UI page failed, \nPlease check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: {}, slave_port:{}".format( + master_port, + worker_port, + ) # yarn mode elif spark_master.startswith("yarn"): probe_masters_slaves_by_Yarn() # reset hostnames according to gethostbyaddr - names = set(HibenchConf['hibench.masters.hostnames'].split() + - HibenchConf['hibench.slaves.hostnames'].split()) + names = set( + HibenchConf["hibench.masters.hostnames"].split() + + HibenchConf["hibench.slaves.hostnames"].split(), + ) new_name_mapping = {} for name in names: try: new_name_mapping[name] = socket.gethostbyaddr(name)[0] except: # host name lookup failure? new_name_mapping[name] = name - HibenchConf['hibench.masters.hostnames'] = repr(" ".join( - [new_name_mapping[x] for x in HibenchConf['hibench.masters.hostnames'].split()])) - HibenchConf['hibench.slaves.hostnames'] = repr(" ".join( - [new_name_mapping[x] for x in HibenchConf['hibench.slaves.hostnames'].split()])) + HibenchConf["hibench.masters.hostnames"] = repr( + " ".join( + [ + new_name_mapping[x] + for x in HibenchConf["hibench.masters.hostnames"].split() + ], + ), + ) + HibenchConf["hibench.slaves.hostnames"] = repr( + " ".join( + [ + new_name_mapping[x] + for x in HibenchConf["hibench.slaves.hostnames"].split() + ], + ), + ) def probe_java_opts(): - file_name = os.path.join(HibenchConf['hibench.hadoop.configure.dir'], 'mapred-site.xml') + file_name = os.path.join( + HibenchConf["hibench.hadoop.configure.dir"], + "mapred-site.xml", + ) cnt = 0 map_java_opts_line = "" reduce_java_opts_line = "" @@ -581,27 +730,40 @@ def probe_java_opts(): cnt += 1 def add_quotation_marks(line): - if not (line.startswith("'") or line.startswith("\"")): + if not (line.startswith("'") or line.startswith('"')): return repr(line) + if map_java_opts_line != "": - HibenchConf['hibench.dfsioe.map.java_opts'] = add_quotation_marks( - map_java_opts_line.split("<")[0].strip()) - HibenchConfRef['hibench.dfsioe.map.java_opts'] = "Probed by configuration file:'%s'" % os.path.join( - HibenchConf['hibench.hadoop.configure.dir'], 'mapred-site.xml') + HibenchConf["hibench.dfsioe.map.java_opts"] = add_quotation_marks( + map_java_opts_line.split("<")[0].strip(), + ) + HibenchConfRef[ + "hibench.dfsioe.map.java_opts" + ] = "Probed by configuration file:'%s'" % os.path.join( + HibenchConf["hibench.hadoop.configure.dir"], + "mapred-site.xml", + ) if reduce_java_opts_line != "": - HibenchConf['hibench.dfsioe.red.java_opts'] = add_quotation_marks( - reduce_java_opts_line.split("<")[0].strip()) - HibenchConfRef['hibench.dfsioe.red.java_opts'] = "Probed by configuration file:'%s'" % os.path.join( - HibenchConf['hibench.hadoop.configure.dir'], 'mapred-site.xml') + HibenchConf["hibench.dfsioe.red.java_opts"] = add_quotation_marks( + reduce_java_opts_line.split("<")[0].strip(), + ) + HibenchConfRef[ + "hibench.dfsioe.red.java_opts" + ] = "Probed by configuration file:'%s'" % os.path.join( + HibenchConf["hibench.hadoop.configure.dir"], + "mapred-site.xml", + ) def generate_optional_value(): # get some critical values from environment or make a guess d = os.path.dirname join = os.path.join - HibenchConf['hibench.home'] = d(d(d(os.path.abspath(__file__)))) + HibenchConf["hibench.home"] = d(d(d(os.path.abspath(__file__)))) del d - HibenchConfRef['hibench.home'] = "Inferred from relative path of dirname(%s)/../../" % __file__ + HibenchConfRef["hibench.home"] = ( + "Inferred from relative path of dirname(%s)/../../" % __file__ + ) probe_java_bin() probe_hadoop_release() @@ -616,8 +778,8 @@ def generate_optional_value(): def export_config(workload_name, framework_name): join = os.path.join - report_dir = HibenchConf['hibench.report.dir'] - conf_dir = join(report_dir, workload_name, framework_name, 'conf') + report_dir = HibenchConf["hibench.report.dir"] + conf_dir = join(report_dir, workload_name, framework_name, "conf") conf_filename = join(conf_dir, "%s.conf" % workload_name) spark_conf_dir = join(conf_dir, "sparkbench") @@ -631,17 +793,26 @@ def export_config(workload_name, framework_name): # generate configure for hibench sources = defaultdict(list) - for env_name, prop_name in HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items(): - source = HibenchConfRef.get(prop_name, 'None') - sources[source].append('%s=%s' % (env_name, HibenchConf.get(prop_name, ''))) - - with open(conf_filename, 'w') as f: + for env_name, prop_name in ( + HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() + ): + source = HibenchConfRef.get(prop_name, "None") + sources[source].append( + "{}={}".format( + env_name, + HibenchConf.get(prop_name, ""), + ), + ) + + with open(conf_filename, "w") as f: for source in sorted(sources.keys()): f.write("# Source: %s\n" % source) f.write("\n".join(sorted(sources[source]))) f.write("\n\n") f.write("#Source: add for internal usage\n") - f.write("SPARKBENCH_PROPERTIES_FILES=%s\n" % sparkbench_prop_conf_filename) + f.write( + "SPARKBENCH_PROPERTIES_FILES=%s\n" % sparkbench_prop_conf_filename, + ) f.write("SPARK_PROP_CONF=%s\n" % spark_prop_conf_filename) f.write("WORKLOAD_RESULT_FOLDER=%s\n" % join(conf_dir, "..")) f.write("HIBENCH_WORKLOAD_CONF=%s\n" % conf_filename) @@ -651,10 +822,10 @@ def export_config(workload_name, framework_name): # generate properties for spark & sparkbench sources = defaultdict(list) for prop_name, prop_value in HibenchConf.items(): - source = HibenchConfRef.get(prop_name, 'None') - sources[source].append('%s\t%s' % (prop_name, prop_value)) + source = HibenchConfRef.get(prop_name, "None") + sources[source].append(f"{prop_name}\t{prop_value}") # generate configure for sparkbench - with open(spark_prop_conf_filename, 'w') as f: + with open(spark_prop_conf_filename, "w") as f: for source in sorted(sources.keys()): items = [x for x in sources[source] if x.startswith("spark.")] if items: @@ -662,10 +833,16 @@ def export_config(workload_name, framework_name): f.write("\n".join(sorted(items))) f.write("\n\n") # generate configure for spark - with open(sparkbench_prop_conf_filename, 'w') as f: + with open(sparkbench_prop_conf_filename, "w") as f: for source in sorted(sources.keys()): - items = [x for x in sources[source] if x.startswith( - "sparkbench.") or x.startswith("hibench.")] + items = [ + x + for x in sources[source] + if x.startswith( + "sparkbench.", + ) + or x.startswith("hibench.") + ] if items: f.write("# Source: %s\n" % source) f.write("\n".join(sorted(items))) @@ -673,13 +850,24 @@ def export_config(workload_name, framework_name): return conf_filename + if __name__ == "__main__": if len(sys.argv) < 4: raise Exception( - "Please supply , , [") - conf_root, workload_configFile, workload_folder = sys.argv[1], sys.argv[2], sys.argv[3] + "Please supply , , [", + ) + conf_root, workload_configFile, workload_folder = ( + sys.argv[1], + sys.argv[2], + sys.argv[3], + ) if len(sys.argv) > 4: patching_config = sys.argv[4] else: - patching_config = '' - load_config(conf_root, workload_configFile, workload_folder, patching_config) + patching_config = "" + load_config( + conf_root, + workload_configFile, + workload_folder, + patching_config, + ) diff --git a/bin/functions/monitor.py b/bin/functions/monitor.py index 53f2808a2..176742b96 100755 --- a/bin/functions/monitor.py +++ b/bin/functions/monitor.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,33 +13,50 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import threading, subprocess, re, os, sys, signal, socket -from time import sleep, time +from __future__ import annotations + +import os +import re +import signal +import socket +import subprocess +import sys +import threading +import traceback +from collections import namedtuple from contextlib import closing -import traceback, thread from datetime import datetime -from collections import namedtuple -from pprint import pprint from itertools import groupby +from pprint import pprint +from time import sleep +from time import time + +import thread # Probe intervals, in seconds. # Warning: a value too short may get wrong results due to lack of data when system load goes high. # and must be float! -PROBE_INTERVAL=float(5) +PROBE_INTERVAL = float(5) + +# FIXME: use log helper later +# log_lock = threading.Lock() + -#FIXME: use log helper later -#log_lock = threading.Lock() def log(*s): - if len(s)==1: s=s[0] - else: s= " ".join([str(x) for x in s]) -# with log_lock: -# with open("/home/zhihui/monitor_proc.log", 'a') as f: - log_str = str(thread.get_ident())+":"+str(s) +'\n' + if len(s) == 1: + s = s[0] + else: + s = " ".join([str(x) for x in s]) + # with log_lock: + # with open("/home/zhihui/monitor_proc.log", 'a') as f: + log_str = str(thread.get_ident()) + ":" + str(s) + "\n" # f.write( log_str ) sys.stderr.write(log_str) - -entered=False + + +entered = False + + def sig_term_handler(signo, stack): global entered global log_path @@ -49,42 +66,48 @@ def sig_term_handler(signo, stack): global na if not entered: - entered=True # FIXME: Not atomic - else: return + entered = True # FIXME: Not atomic + else: + return na.stop() generate_report(workload_title, log_path, bench_log_path, report_path) sys.exit(0) + def samedir(fn): """ return abspath of fn in the same directory where this python file stores """ return os.path.abspath(os.path.join(os.path.dirname(__file__), fn)) -class PatchedNameTuple(object): + +class PatchedNameTuple: def __sub__(self, other): assert isinstance(other, self.__class__) assert self[0] == other[0] cls = self.__class__ - return cls(self[0], *[a-b for a, b in zip(self[1:], other[1:])]) + return cls(self[0], *[a - b for a, b in zip(self[1:], other[1:])]) def __div__(self, other): - return self.__class__(self[0], *[a/other for a in self[1:]]) + return self.__class__(self[0], *[a / other for a in self[1:]]) def _add(self, other, override_title=None): - if other == None: return self + if other == None: + return self assert isinstance(other, self.__class__) cls = self.__class__ title = self[0] if not override_title else override_title - return cls(title, *[a+b for a, b in zip(self[1:], other[1:])]) + return cls(title, *[a + b for a, b in zip(self[1:], other[1:])]) + def ident(size, s): - return "\n".join((" "*size + x for x in s.split("\n"))) + return "\n".join(" " * size + x for x in s.split("\n")) + class RemoteProc(threading.Thread): - SEP="----SEP----" - template_debug=r"""exec(' + SEP = "----SEP----" + template_debug = r"""exec(' import time, os, sys, socket, traceback socket.setdefaulttimeout(1) def log(*x, **kw): @@ -101,7 +124,7 @@ def log(*x, **kw): while True: log("accepting") try: - print s.getsockname()[1] + print(s.getsockname()[1]) s2,peer=s.accept() break except socket.timeout: @@ -117,12 +140,12 @@ def log(*x, **kw): s2.send("{SEP}#end"+chr(10)) time.sleep({interval}) ')""" - template=r"""exec(' + template = r"""exec(' import time, os, sys, socket, traceback s=socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("0.0.0.0",0)) s.listen(5) -print s.getsockname()[1] +print(s.getsockname()[1]) s2,peer=s.accept() {func_template} while True: @@ -137,55 +160,64 @@ def __init__(self, host, interval=1): self.cmds = [] self.interval = interval self.monitor_ins = {} - self.local_aggr_container={} - self._running=True + self.local_aggr_container = {} + self._running = True - super(RemoteProc, self).__init__() + super().__init__() def register(self, monitor_ins, cmds): assert isinstance(monitor_ins, BaseMonitor) - self.monitor_ins[len(self.cmds)] = monitor_ins # monitor command seq id => monitor instance + # monitor command seq id => monitor instance + self.monitor_ins[len(self.cmds)] = monitor_ins self.cmds.append(cmds) def run(self): - func_template = "\n".join(["def func_{id}():\n{func}"\ - .format(id=id, - func=ident(2, - func+'\ns2.send("{SEP}={id}"+chr(10))'\ - .format(SEP=self.SEP, id=id))) \ - for id, func in enumerate(self.cmds)]) - call_template="\n".join([" func_{id}()"\ - .format(id=id) for id in range(len(self.cmds))] - ) - script = self.template.format(func_template=func_template, - call_template=call_template, - interval = self.interval, - SEP = self.SEP) - - s = script.replace('"', r'\"').replace("\n", r"\n") - container=[] -# log("ssh client to:", self.host) - with self.ssh_client(self.host, "python -u -c \"{script}\"".format(script=s)) as f: -# log("ssh client %s connected" % self.host) + func_template = "\n".join( + [ + "def func_{id}():\n{func}".format( + id=id, + func=ident( + 2, + func + f'\ns2.send("{self.SEP}={id}"+chr(10))', + ), + ) + for id, func in enumerate(self.cmds) + ], + ) + call_template = "\n".join( + [f" func_{id}()" for id in range(len(self.cmds))], + ) + script = self.template.format( + func_template=func_template, + call_template=call_template, + interval=self.interval, + SEP=self.SEP, + ) + + s = script.replace('"', r"\"").replace("\n", r"\n") + container = [] + # log("ssh client to:", self.host) + with self.ssh_client(self.host, f'python -u -c "{s}"') as f: + # log("ssh client %s connected" % self.host) try: port_line = f.readline() -# log("host:", self.host, "got port,", port_line) + # log("host:", self.host, "got port,", port_line) port = int(port_line.rstrip()) - s=socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(0.5) - for i in range(30): # try to connect 30 times maximum + for i in range(30): # try to connect 30 times maximum try: -# log("try to connect:", self.host, port) + # log("try to connect:", self.host, port) s.connect((self.host, port)) -# log("connectted to:", self.host, port) + # log("connectted to:", self.host, port) break except socket.timeout: -# log("connecting to:", self.host, port, "timedout") + # log("connecting to:", self.host, port, "timedout") pass - else: # not connectted after 30 times trying -# log("cann't connectted to:", self.host, port) + else: # not connectted after 30 times trying + # log("cann't connectted to:", self.host, port) s.shutdown(socket.SHUT_RDWR) - self.ssh_close() + self.ssh_close() return s.settimeout(None) except Exception as e: @@ -197,19 +229,23 @@ def run(self): l = f2.readline() except KeyboardInterrupt: break - if not l: break + if not l: + break if l.startswith(self.SEP): tail = l.lstrip(self.SEP) - if tail[0]=='+': # timestamp + if tail[0] == "+": # timestamp remote_timestamp = float(tail[1:]) cur_timestamp = time() - elif tail.startswith('#end'): # end sign -# log("na push, timestamp:", cur_timestamp) + elif tail.startswith("#end"): # end sign + # log("na push, timestamp:", cur_timestamp) self.na_push(cur_timestamp) else: id = int(tail[1:]) if self.monitor_ins[id]: - self.monitor_ins[id].feed(container, cur_timestamp) + self.monitor_ins[id].feed( + container, + cur_timestamp, + ) container = [] else: container.append(l.rstrip()) @@ -217,15 +253,15 @@ def run(self): self.ssh_close() def stop(self): - self._running=False + self._running = False def aggregate(self, timestamp, data): if not self.local_aggr_container: - self.local_aggr_container['timestamp']=timestamp - assert timestamp == self.local_aggr_container['timestamp'] + self.local_aggr_container["timestamp"] = timestamp + assert timestamp == self.local_aggr_container["timestamp"] assert type(data) is dict self.local_aggr_container.update(data) - self.local_aggr_container['timestamp'] = timestamp + self.local_aggr_container["timestamp"] = timestamp def na_register(self, na): assert isinstance(na, NodeAggregator) @@ -233,46 +269,62 @@ def na_register(self, na): def na_push(self, timestamp): if self.local_aggr_container: - assert self.local_aggr_container.get('timestamp', -1) == timestamp - self.node_aggr_parent.commit_aggregate(self.host, self.local_aggr_container) - self.local_aggr_container={} + assert self.local_aggr_container.get("timestamp", -1) == timestamp + self.node_aggr_parent.commit_aggregate( + self.host, + self.local_aggr_container, + ) + self.local_aggr_container = {} + + +class BaseMonitor: + IGNORE_KEYS = [] -class BaseMonitor(object): - IGNORE_KEYS=[] def __init__(self, rproc): self.rproc = rproc self._last = None - def feed(self, container, timestamp): # override to parse pulled data files + def feed(self, container, timestamp): # override to parse pulled data files raise NotImplementedError() - def ssh_client(self, host, shell): # override for opening ssh client + def ssh_client(self, host, shell): # override for opening ssh client raise NotImplementedError() - def ssh_close(self): # override for clear up ssh client + def ssh_close(self): # override for clear up ssh client raise NotImplementedError() def commit(self, timestamp, header, stat): - if self._last is None: self._last = stat + if self._last is None: + self._last = stat else: - stat_delta = dict([(header+'/'+k, stat[k] - self._last[k]) \ - for k in set(self._last.keys()).union(set(stat.keys()))\ - if k in stat and k in self._last and k not in self.IGNORE_KEYS - ]) + stat_delta = { + header + "/" + k: stat[k] - self._last[k] + for k in set(self._last.keys()).union(set(stat.keys())) + if k in stat and k in self._last and k not in self.IGNORE_KEYS + } self._last = stat -# if header.startswith("net"): -# print stat_delta - stat_delta[header+'/total'] = reduce_patched(lambda a,b: a._add(b, 'total'), stat_delta.values()) + # if header.startswith("net"): + # print(stat_delta) + stat_delta[header + "/total"] = reduce_patched( + lambda a, b: a._add(b, "total"), + stat_delta.values(), + ) self.rproc.aggregate(timestamp, stat_delta) -class BashSSHClientMixin(object): +class BashSSHClientMixin: ssh_lock = threading.Lock() + def ssh_client(self, host, shell): - with open(os.devnull, 'rb', 0) as DEVNULL: + with open(os.devnull, "rb", 0) as DEVNULL: with BashSSHClientMixin.ssh_lock: - self.proc = subprocess.Popen(["ssh", host, shell], bufsize=1, - stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + self.proc = subprocess.Popen( + ["ssh", host, shell], + bufsize=1, + stdin=DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) return self.proc.stdout def ssh_close(self): @@ -281,126 +333,269 @@ def ssh_close(self): self.proc.wait() return self.proc.returncode -_CPU=namedtuple("CPU", ['label', 'user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq']) + +_CPU = namedtuple( + "CPU", + [ + "label", + "user", + "nice", + "system", + "idle", + "iowait", + "irq", + "softirq", + ], +) + + class CPU(_CPU, PatchedNameTuple): def percentage(self): total = sum(self[1:]) - return CPU(self[0], *[x*100.0 / total for x in self[1:]]) if total>0 else self + return ( + CPU( + self[0], + *[x * 100.0 / total for x in self[1:]], + ) + if total > 0 + else self + ) + class CPUMonitor(BaseMonitor): def __init__(self, rproc): - super(CPUMonitor, self).__init__(rproc) - rproc.register(self, """with open("/proc/stat") as f: + super().__init__(rproc) + rproc.register( + self, + """with open("/proc/stat") as f: s2.send("".join([x for x in f.readlines() if x.startswith("cpu")])) -""") +""", + ) def feed(self, container, timestamp): "parse /proc/stat" - self.commit(timestamp, dict([self._parse_stat(line) for line in container])) + self.commit( + timestamp, + dict( + [self._parse_stat(line) for line in container], + ), + ) def _parse_stat(self, line): "parse one line of /proc/stat" assert line.strip(), "BUG! empty line in /proc/stat" fields = line.split() - if fields[0]=='cpu': - fields[0]='total' + if fields[0] == "cpu": + fields[0] = "total" return (fields[0], CPU(fields[0], *[int(x) for x in fields[1:8]])) def commit(self, timestamp, cpu_stat): if self._last is None: self._last = cpu_stat else: - cpu_usage = dict([("cpu/"+k, (cpu_stat[k] - self._last[k]).percentage()) for k in self._last]) + cpu_usage = { + "cpu/" + k: (cpu_stat[k] - self._last[k]).percentage() + for k in self._last + } self._last = cpu_stat self.rproc.aggregate(timestamp, cpu_usage) -_Network=namedtuple("Network", ['label', "recv_bytes", "recv_packets", "recv_errs", "recv_drop", - "send_bytes", "send_packets", "send_errs", "send_drop"]) -class Network(_Network, PatchedNameTuple): pass + +_Network = namedtuple( + "Network", + [ + "label", + "recv_bytes", + "recv_packets", + "recv_errs", + "recv_drop", + "send_bytes", + "send_packets", + "send_errs", + "send_drop", + ], +) + + +class Network(_Network, PatchedNameTuple): + pass + class NetworkMonitor(BaseMonitor): - IGNORE_KEYS=["lo"] + IGNORE_KEYS = ["lo"] + def __init__(self, rproc): - rproc.register(self, """with open("/proc/net/dev") as f: + rproc.register( + self, + """with open("/proc/net/dev") as f: s2.send("".join([x for x in f.readlines()])) -""") - self._filter = re.compile('^\s*(.+):\s*(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+).*$') - super(NetworkMonitor, self).__init__(rproc) +""", + ) + self._filter = re.compile( + r"^\s*(.+):\s*(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+).*$", + ) + super().__init__(rproc) def feed(self, container, timestamp): "parse /proc/net/dev" - self.commit(timestamp, "net", dict(filter(lambda x:x, [self._parse_net_dev(line) for line in container]))) + self.commit( + timestamp, + "net", + dict( + filter( + lambda x: x, + [self._parse_net_dev(line) for line in container], + ), + ), + ) def _parse_net_dev(self, line): matched = self._filter.match(line) if matched: - obj = Network(matched.groups()[0], *[int(x) for x in matched.groups()[1:]]) - if not (obj.recv_bytes==0 and obj.send_bytes==0): + obj = Network( + matched.groups()[0], + *[int(x) for x in matched.groups()[1:]], + ) + if not (obj.recv_bytes == 0 and obj.send_bytes == 0): return (obj[0], obj) -_Disk=namedtuple("Disk", ["label", "io_read", "bytes_read", "time_spent_read", "io_write", "bytes_write", "time_spent_write"]) -class Disk(_Disk, PatchedNameTuple): pass +_Disk = namedtuple( + "Disk", + [ + "label", + "io_read", + "bytes_read", + "time_spent_read", + "io_write", + "bytes_write", + "time_spent_write", + ], +) + + +class Disk(_Disk, PatchedNameTuple): + pass + class DiskMonitor(BaseMonitor): def __init__(self, rproc): - super(DiskMonitor, self).__init__(rproc) - rproc.register(self, """with open("/proc/diskstats") as f: + super().__init__(rproc) + rproc.register( + self, + """with open("/proc/diskstats") as f: blocks = os.listdir("/sys/block") s2.send("".join([x for x in f.readlines() if x.split()[2] in blocks and not x.split()[2].startswith("loop") and x.split()[3]!="0"])) -""") +""", + ) def feed(self, container, timestamp): "parse /proc/diskstats" - self.commit(timestamp, "disk", dict([self._parse_disk_stat(line) for line in container])) + self.commit( + timestamp, + "disk", + dict( + [self._parse_disk_stat(line) for line in container], + ), + ) def _parse_disk_stat(self, line): fields = line.split()[2:] - obj = Disk(fields[0], - io_read=int(fields[1]), bytes_read=int(fields[3])*512, time_spent_read=int(fields[4])/1000.0, - io_write=int(fields[5]), bytes_write=int(fields[7])*512, time_spent_write=int(fields[8])/1000.0) + obj = Disk( + fields[0], + io_read=int(fields[1]), + bytes_read=int(fields[3]) * 512, + time_spent_read=int(fields[4]) / 1000.0, + io_write=int(fields[5]), + bytes_write=int(fields[7]) * 512, + time_spent_write=int(fields[8]) / 1000.0, + ) return (obj[0], obj) -_Memory=namedtuple("Memory", ["label", "total", "used", "buffer_cache", "free", "map"]) -class Memory(_Memory, PatchedNameTuple): pass +_Memory = namedtuple( + "Memory", + ["label", "total", "used", "buffer_cache", "free", "map"], +) + + +class Memory(_Memory, PatchedNameTuple): + pass + class MemoryMonitor(BaseMonitor): def __init__(self, rproc): - super(MemoryMonitor, self).__init__(rproc) - rproc.register(self, """with open("/proc/meminfo") as f: + super().__init__(rproc) + rproc.register( + self, + """with open("/proc/meminfo") as f: mem = dict([(a, b.split()[0].strip()) for a, b in [x.split(":") for x in f.readlines()]]) s2.send(":".join([mem[field] for field in ["MemTotal", "Buffers", "Cached", "MemFree", "Mapped"]])+chr(10)) -""") +""", + ) def feed(self, memory_status, timestamp): "parse /proc/meminfo" - total, buffers, cached, free, mapped= [int(x) for x in memory_status[0].split(":")] + total, buffers, cached, free, mapped = ( + int(x) for x in memory_status[0].split(":") + ) + + self.rproc.aggregate( + timestamp, + { + "memory/total": Memory( + label="total", + total=total, + used=total - free - buffers - cached, + buffer_cache=buffers + cached, + free=free, + map=mapped, + ), + }, + ) + + +_Proc = namedtuple( + "Proc", + ["label", "load5", "load10", "load15", "running", "procs"], +) + + +class Proc(_Proc, PatchedNameTuple): + pass - self.rproc.aggregate(timestamp, {"memory/total":Memory(label="total", total=total, - used=total - free - buffers-cached, - buffer_cache=buffers + cached, - free=free, map=mapped)}) -_Proc=namedtuple("Proc", ["label", "load5", "load10", "load15", "running", "procs"]) -class Proc(_Proc, PatchedNameTuple): pass class ProcMonitor(BaseMonitor): def __init__(self, rproc): - super(ProcMonitor, self).__init__(rproc) - rproc.register(self, """with open("/proc/loadavg") as f: + super().__init__(rproc) + rproc.register( + self, + """with open("/proc/loadavg") as f: s2.send(f.read()) -""") +""", + ) def feed(self, load_status, timestamp): "parse /proc/meminfo" - load5, load10, load15, running_procs= load_status[0].split()[:4] - running, procs = running_procs.split('/') - - self.rproc.aggregate(timestamp, {"proc":Proc(label="total", load5=float(load5), load10=float(load10), - load15=float(load15), running=int(running), procs=int(procs))}) - - -class NodeAggregator(object): + load5, load10, load15, running_procs = load_status[0].split()[:4] + running, procs = running_procs.split("/") + + self.rproc.aggregate( + timestamp, + { + "proc": Proc( + label="total", + load5=float(load5), + load10=float(load10), + load15=float(load15), + running=int(running), + procs=int(procs), + ), + }, + ) + + +class NodeAggregator: def __init__(self, log_name): self.node_pool = {} self.log_name = log_name @@ -409,14 +604,14 @@ def __init__(self, log_name): os.unlink(self.log_name) except OSError: pass - + def append(self, node): assert isinstance(node, RemoteProc) self.node_pool[node.host] = node node.na_register(self) def commit_aggregate(self, node, datas): - datas['hostname'] = node + datas["hostname"] = node with self.log_lock: with file(self.log_name, "a") as f: f.write(repr(datas) + "\n") @@ -431,6 +626,7 @@ def stop(self): for v in self.node_pool.values(): v.join() + def round_to_base(v, b): """ >>> round_to_base(0.1, 0.3) @@ -446,51 +642,64 @@ def round_to_base(v, b): """ for i in range(10): base = int(b * 10**i) - if abs(base - b * 10**i) < 0.001: break - assert base>0 + if abs(base - b * 10**i) < 0.001: + break + assert base > 0 return float(int(v * 10**i) / base * base) / (10**i) + def filter_dict_with_prefix(d, prefix, sort=True): keys = sorted(d.keys()) if sort else d.keys() - if prefix[0]=='!': - return dict([(x, d[x]) for x in keys if not x.startswith(prefix[1:])]) + if prefix[0] == "!": + return {x: d[x] for x in keys if not x.startswith(prefix[1:])} else: - return dict([(x, d[x]) for x in keys if x.startswith(prefix)]) + return {x: d[x] for x in keys if x.startswith(prefix)} + def reduce_patched(func, data): - if len(data)==1: + if len(data) == 1: return data[0] - elif len(data)==0: + elif len(data) == 0: return data else: return reduce(func, data) + def filter_dict_with_prefixes(d, *prefixes): - if len(prefixes)==1: + if len(prefixes) == 1: return filter_dict_with_prefix(d, prefixes[0]) else: - return reduce_patched(lambda a,b: filter_dict_with_prefix(filter_dict_with_prefix(d, a),b), - prefixes) + return reduce_patched( + lambda a, b: filter_dict_with_prefix( + filter_dict_with_prefix(d, a), + b, + ), + prefixes, + ) + def test(): p = BashSSHClientMixin() - script=r"""exec(' + script = r"""exec(' import time, os, sys while 1: - with open("/proc/stat") as f: print f.read(), - print "---hello---" + with open("/proc/stat") as f: print(f.read()), + print("---hello---") time.sleep(1) ')""" - s = script.replace('"', r'\"').replace("\n", r"\n") - with p.ssh_client("localhost", "python -u -c \"{s}\"".format(s=s)) as f: + s = script.replace('"', r"\"").replace("\n", r"\n") + with p.ssh_client("localhost", f'python -u -c "{s}"') as f: while 1: l = f.readline() - print l.rstrip() - if not l: break + print(l.rstrip()) + if not l: + break p.ssh_close() + def test2(): - class P(RemoteProc, BashSSHClientMixin): pass + class P(RemoteProc, BashSSHClientMixin): + pass p = P("localhost", 0.3) CPUMonitor(p) @@ -500,6 +709,7 @@ class P(RemoteProc, BashSSHClientMixin): pass p.run() + def start_monitor(log_filename, nodes): class P(RemoteProc, BashSSHClientMixin): def __init__(self, *args): @@ -509,6 +719,7 @@ def __init__(self, *args): DiskMonitor(self) MemoryMonitor(self) ProcMonitor(self) + global na na = NodeAggregator(log_filename) nodes = sorted(list(set(nodes))) @@ -516,14 +727,27 @@ def __init__(self, *args): na.append(P(node, PROBE_INTERVAL)) na.run() + def parse_bench_log(benchlog_fn): - events=["x,event"] - _spark_stage_submit = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO [a-zA-Z0-9_\.]*DAGScheduler: Submitting (Stage \d+) \((.*)\).+$") # submit spark stage - _spark_stage_finish = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO [a-zA-Z0-9_\.]*DAGScheduler: (Stage \d+) \((.*)\) finished.+$") # spark stage finish - _hadoop_run_job = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.*\.Job.*: Running job: job_([\d_]+)$") # hadoop run job - _hadoop_map_reduce_progress = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.*\.Job.*:\s+map (\d{1,2})% reduce (\d{1,2})%$") # hadoop reduce progress - _hadoop_job_complete_mr1 = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.JobClient: Job complete: job_([\d_]+)$") - _hadoop_job_complete_mr2 = re.compile("^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapreduce.Job: Job job_([\d_]+) completed successfully$") + events = ["x,event"] + _spark_stage_submit = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO [a-zA-Z0-9_\.]*DAGScheduler: Submitting (Stage \d+) \((.*)\).+$", + ) # submit spark stage + _spark_stage_finish = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO [a-zA-Z0-9_\.]*DAGScheduler: (Stage \d+) \((.*)\) finished.+$", + ) # spark stage finish + _hadoop_run_job = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.*\.Job.*: Running job: job_([\d_]+)$", + ) # hadoop run job + _hadoop_map_reduce_progress = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.*\.Job.*:\s+map (\d{1,2})% reduce (\d{1,2})%$", + ) # hadoop reduce progress + _hadoop_job_complete_mr1 = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapred.JobClient: Job complete: job_([\d_]+)$", + ) + _hadoop_job_complete_mr2 = re.compile( + r"^(\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) INFO mapreduce.Job: Job job_([\d_]+) completed successfully$", + ) """ # MR1 sample @@ -539,78 +763,132 @@ def parse_bench_log(benchlog_fn): ... 15/04/10 17:20:25 INFO mapreduce.Job: Job job_1427781540447_0448 completed successfully """ - flag={} + flag = {} with open(benchlog_fn) as f: while True: line = f.readline().rstrip() - if not line: break - for rule in [_spark_stage_submit, _spark_stage_finish, _hadoop_run_job, _hadoop_map_reduce_progress, _hadoop_job_complete_mr1, _hadoop_job_complete_mr2]: + if not line: + break + for rule in [ + _spark_stage_submit, + _spark_stage_finish, + _hadoop_run_job, + _hadoop_map_reduce_progress, + _hadoop_job_complete_mr1, + _hadoop_job_complete_mr2, + ]: matched = rule.match(line) if matched: result = matched.groups() - timestamp = datetime.strptime(result[0], r"%y/%m/%d %H:%M:%S").strftime("%s")+"000" # convert to millsec for js + timestamp = ( + datetime.strptime( + result[0], + r"%y/%m/%d %H:%M:%S", + ).strftime("%s") + + "000" + ) # convert to millsec for js if rule is _spark_stage_submit: - events.append("{t},Start {v1} ({v2})".format(t=timestamp, v1=result[1], v2=result[2])) + events.append( + "{t},Start {v1} ({v2})".format( + t=timestamp, + v1=result[1], + v2=result[2], + ), + ) elif rule is _spark_stage_finish: - events.append("{t},Finish {v1} ({v2})".format(t=timestamp, v1=result[1], v2=result[2])) + events.append( + "{t},Finish {v1} ({v2})".format( + t=timestamp, + v1=result[1], + v2=result[2], + ), + ) elif rule is _hadoop_run_job: - events.append("{t},Start Job {v1}".format(t=timestamp, v1=result[1])) - flag={} + events.append( + "{t},Start Job {v1}".format( + t=timestamp, + v1=result[1], + ), + ) + flag = {} elif rule is _hadoop_map_reduce_progress: - map_progress,reduce_progress = int(result[1]), int(result[2]) - op={'map':False, 'reduce':False} + map_progress, reduce_progress = int( + result[1], + ), int(result[2]) + op = {"map": False, "reduce": False} if map_progress == 100: if not "map" in flag: - op['map'] = True - flag['map'] = True - elif reduce_progress>0: - if not 'reduce' in flag: - op['reduce'] = True - flag['reduce'] = True - if op['map'] and op['reduce']: - events.append("{t},Map finish and Reduce start".format(t=timestamp)) - elif op['map']: - events.append("{t},Map finish".format(t=timestamp)) - elif op['reduce']: - events.append("{t},Reduce start".format(t=timestamp)) - elif rule is _hadoop_job_complete_mr1 or rule is _hadoop_job_complete_mr2: - events.append("{t},Finsih Job {v1}".format(t=timestamp, v1=result[1])) + op["map"] = True + flag["map"] = True + elif reduce_progress > 0: + if not "reduce" in flag: + op["reduce"] = True + flag["reduce"] = True + if op["map"] and op["reduce"]: + events.append( + f"{timestamp},Map finish and Reduce start", + ) + elif op["map"]: + events.append(f"{timestamp},Map finish") + elif op["reduce"]: + events.append( + f"{timestamp},Reduce start", + ) + elif ( + rule is _hadoop_job_complete_mr1 + or rule is _hadoop_job_complete_mr2 + ): + events.append( + "{t},Finsih Job {v1}".format( + t=timestamp, + v1=result[1], + ), + ) else: assert 0, "should never reach here" - # limit maximum string length of events for i in range(len(events)): - event_time, event_str = re.split(',', events[i], 1) + event_time, event_str = re.split(",", events[i], 1) if len(event_str) > 45: - event_str = event_str[:21]+ '...' + event_str[-21:] - events[i]="%s,%s" % (event_time, event_str) + event_str = event_str[:21] + "..." + event_str[-21:] + events[i] = f"{event_time},{event_str}" # merge events occurred at sametime: i = 1 - while i < len(events)-1: - cur = events[i].split(',')[0] - next = events[i+1].split(',')[0] - if abs(int(cur)/1000 - int(next)/1000) < 1: - events[i] = events[i] + "
" + re.split(',', events[i+1], 1)[1] - del events[i+1] + while i < len(events) - 1: + cur = events[i].split(",")[0] + next = events[i + 1].split(",")[0] + if abs(int(cur) / 1000 - int(next) / 1000) < 1: + events[i] = events[i] + "
" + \ + re.split(",", events[i + 1], 1)[1] + del events[i + 1] continue i += 1 return events + def generate_report(workload_title, log_fn, benchlog_fn, report_fn): - c =- 1 + c = -1 with open(log_fn) as f: - datas=[eval(x) for x in f.readlines()] + datas = [eval(x) for x in f.readlines()] - all_hosts = sorted(list(set([x['hostname'] for x in datas]))) - data_slices = groupby(datas, lambda x:round_to_base(x['timestamp'], PROBE_INTERVAL)) # round to time interval and groupby + all_hosts = sorted(list({x["hostname"] for x in datas})) + data_slices = groupby( + datas, + lambda x: round_to_base( + x["timestamp"], + PROBE_INTERVAL, + ), + ) # round to time interval and groupby # Generating CSVs cpu_heatmap = ["x,y,value,hostname,coreid"] cpu_overall = ["x,idle,user,system,iowait,others"] network_heatmap = ["x,y,value,hostname,adapterid"] - network_overall = ["x,recv_bytes,send_bytes,|recv_packets,send_packets,errors"] + network_overall = [ + "x,recv_bytes,send_bytes,|recv_packets,send_packets,errors", + ] diskio_heatmap = ["x,y,value,hostname,diskid"] diskio_overall = ["x,read_bytes,write_bytes,|read_io,write_io"] memory_heatmap = ["x,y,value,hostname"] @@ -619,211 +897,287 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): procload_overall = ["x,load5,load10,load15,|running,procs"] events = parse_bench_log(benchlog_fn) - cpu_count={} - network_count={} - diskio_count={} - memory_count={} - proc_count={} + cpu_count = {} + network_count = {} + diskio_count = {} + memory_count = {} + proc_count = {} for t, sub_data in data_slices: - classed_by_host = dict([(x['hostname'], x) for x in sub_data]) + classed_by_host = {x["hostname"]: x for x in sub_data} # total cpus, plot user/sys/iowait/other data_by_all_hosts = [classed_by_host.get(h, {}) for h in all_hosts] # all cpu cores, total cluster - summed1 = [x['cpu/total'] for x in data_by_all_hosts if x.has_key('cpu/total')] - if summed1: - summed = reduce_patched(lambda a,b: a._add(b), summed1) / len(summed1) + summed1 = [ + x["cpu/total"] + for x in data_by_all_hosts if x.has_key("cpu/total") + ] + if summed1: + summed = reduce_patched( + lambda a, b: a._add(b), + summed1, + ) / len(summed1) for x in data_by_all_hosts: - cpu = x.get('cpu/total', None) - if not cpu: continue + cpu = x.get("cpu/total", None) + if not cpu: + continue # user, system, io, idle, others -# print t, x['hostname'], cpu.user, cpu.system, cpu.iowait, cpu.idle, cpu.nice+cpu.irq+cpu.softirq -# print t, summed - cpu_overall.append("{time},{idle},{user},{system},{iowait},{others}" \ - .format(time = int(t*1000), user = summed.user, system = summed.system, - iowait = summed.iowait, idle = summed.idle, - others = summed.nice + summed.irq + summed.softirq)) + # print(t, x['hostname'], cpu.user, cpu.system, cpu.iowait, cpu.idle, cpu.nice+cpu.irq+cpu.softirq) + # print(t, summed) + cpu_overall.append( + "{time},{idle},{user},{system},{iowait},{others}".format( + time=int(t * 1000), + user=summed.user, + system=summed.system, + iowait=summed.iowait, + idle=summed.idle, + others=summed.nice + summed.irq + summed.softirq, + ), + ) # all cpu cores, plot heatmap according to cpus/time/usage(100%-idle) for idx, x in enumerate(data_by_all_hosts): - for idy, y in enumerate(filter_dict_with_prefixes(x, "cpu", "!cpu/total").values()): + for idy, y in enumerate( + filter_dict_with_prefixes(x, "cpu", "!cpu/total").values(), + ): try: - pos = cpu_count[(idx, idy, x['hostname'])] + pos = cpu_count[(idx, idy, x["hostname"])] except: - pos = len(cpu_count) - cpu_count[(idx, idy, x['hostname'])] = pos -# print t, pos, 100-y.idle, x['hostname'], y.label - cpu_heatmap.append("{time},{pos},{value},{host},{cpuid}" \ - .format(time = int(t*1000), pos = pos, value = 100-y.idle, - host = x['hostname'], cpuid = y.label)) + pos = len(cpu_count) + cpu_count[(idx, idy, x["hostname"])] = pos + # print(t, pos, 100-y.idle, x['hostname'], y.label) + cpu_heatmap.append( + "{time},{pos},{value},{host},{cpuid}".format( + time=int(t * 1000), + pos=pos, + value=100 - y.idle, + host=x["hostname"], + cpuid=y.label, + ), + ) # all disk of each node, total cluster - summed1=[x['disk/total'] for x in data_by_all_hosts if x.has_key('disk/total')] + summed1 = [ + x["disk/total"] for x in data_by_all_hosts if x.has_key("disk/total") + ] if summed1: - summed = reduce_patched(lambda a,b: a._add(b), summed1) + summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: - disk = x.get('disk/total', None) - if not disk: continue + disk = x.get("disk/total", None) + if not disk: + continue # io-read, io-write, bytes-read, bytes-write -# print t, x['hostname'], disk.io_read, disk.io_write, disk.bytes_read, disk.bytes_write - # print t, summed - diskio_overall.append("{time},{bytes_read},{bytes_write},{io_read},{io_write}" \ - .format(time = int(t*1000), - bytes_read = summed.bytes_read / PROBE_INTERVAL, - bytes_write = summed.bytes_write / PROBE_INTERVAL, - io_read = summed.io_read / PROBE_INTERVAL, - io_write = summed.io_write / PROBE_INTERVAL)) - + # print(t, x['hostname'], disk.io_read, disk.io_write, disk.bytes_read, disk.bytes_write) + # print(t, summed) + diskio_overall.append( + "{time},{bytes_read},{bytes_write},{io_read},{io_write}".format( + time=int(t * 1000), + bytes_read=summed.bytes_read / PROBE_INTERVAL, + bytes_write=summed.bytes_write / PROBE_INTERVAL, + io_read=summed.io_read / PROBE_INTERVAL, + io_write=summed.io_write / PROBE_INTERVAL, + ), + ) # all disks, plot heatmap according to disks/bytes_read+bytes_write for idx, x in enumerate(data_by_all_hosts): - for idy, y in enumerate(filter_dict_with_prefixes(x, "disk", "!disk/total").values()): + for idy, y in enumerate( + filter_dict_with_prefixes(x, "disk", "!disk/total").values(), + ): try: - pos = diskio_count[(idx, idy, x['hostname'])] + pos = diskio_count[(idx, idy, x["hostname"])] except: - pos = len(diskio_count) - diskio_count[(idx, idy, x['hostname'])] = pos -# print t, pos, 100-y.idle, x['hostname'], y.label - diskio_heatmap.append("{time},{pos},{value},{host},{diskid}" \ - .format(time = int(t*1000), - pos = pos, - value = (y.bytes_read + y.bytes_write) / PROBE_INTERVAL, - host = x['hostname'], - diskid = y.label)) + pos = len(diskio_count) + diskio_count[(idx, idy, x["hostname"])] = pos + # print(t, pos, 100-y.idle, x['hostname'], y.label) + diskio_heatmap.append( + "{time},{pos},{value},{host},{diskid}".format( + time=int(t * 1000), + pos=pos, + value=(y.bytes_read + y.bytes_write) / PROBE_INTERVAL, + host=x["hostname"], + diskid=y.label, + ), + ) # memory of each node, total cluster - summed1 = [x['memory/total'] for x in data_by_all_hosts if x.has_key('memory/total')] + summed1 = [ + x["memory/total"] for x in data_by_all_hosts if x.has_key("memory/total") + ] if summed1: - summed = reduce_patched(lambda a,b: a._add(b), summed1) + summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: mem = x.get("memory/total", None) - if not mem: continue + if not mem: + continue # mem-total, mem-used, mem-buffer&cache, mem-free, KB - # print t, x['hostname'], mem.total, mem.used, mem.buffer_cache, mem.free - #print t, summed - memory_overall.append("{time},{free},{buffer_cache},{used}" \ - .format(time = int(t*1000), - free = summed.free, - used = summed.used, - buffer_cache = summed.buffer_cache)) + # print(t, x['hostname'], mem.total, mem.used, mem.buffer_cache, mem.free) + # print(t, summed) + memory_overall.append( + "{time},{free},{buffer_cache},{used}".format( + time=int(t * 1000), + free=summed.free, + used=summed.used, + buffer_cache=summed.buffer_cache, + ), + ) # all memory, plot heatmap according to memory/total - free for idx, x in enumerate(data_by_all_hosts): - for idy, y in enumerate(filter_dict_with_prefixes(x, "memory/total").values()): + for idy, y in enumerate( + filter_dict_with_prefixes(x, "memory/total").values(), + ): try: - pos = memory_count[(idx, idy, x['hostname'])] + pos = memory_count[(idx, idy, x["hostname"])] except: - pos = len(memory_count) - memory_count[(idx, idy, x['hostname'])] = pos -# print t, pos, 100-y.idle, x['hostname'], y.label - memory_heatmap.append("{time},{pos},{value},{host}" \ - .format(time = int(t*1000), - pos = pos, - value = (y.total - y.free)*1000, - host = x['hostname'])) - + pos = len(memory_count) + memory_count[(idx, idy, x["hostname"])] = pos + # print(t, pos, 100-y.idle, x['hostname'], y.label) + memory_heatmap.append( + "{time},{pos},{value},{host}".format( + time=int(t * 1000), + pos=pos, + value=(y.total - y.free) * 1000, + host=x["hostname"], + ), + ) # proc of each node, total cluster - summed1 = [x['proc'] for x in data_by_all_hosts if x.has_key('proc')] - if summed1: - summed = reduce_patched(lambda a,b: a._add(b), summed1) + summed1 = [x["proc"] for x in data_by_all_hosts if x.has_key("proc")] + if summed1: + summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: procs = x.get("proc", None) - if not procs: continue - procload_overall.append("{time},{load5},{load10},{load15},{running},{procs}"\ - .format(time = int(t*1000), - load5 = summed.load5,load10=summed.load10, - load15 = summed.load15,running=summed.running, - procs = summed.procs)) - + if not procs: + continue + procload_overall.append( + "{time},{load5},{load10},{load15},{running},{procs}".format( + time=int(t * 1000), + load5=summed.load5, + load10=summed.load10, + load15=summed.load15, + running=summed.running, + procs=summed.procs, + ), + ) + # all nodes' proc, plot heatmap according to proc/proc.procs for idx, x in enumerate(data_by_all_hosts): for idy, y in enumerate(filter_dict_with_prefixes(x, "proc").values()): try: - pos = proc_count[(idx, idy, x['hostname'])] + pos = proc_count[(idx, idy, x["hostname"])] except: - pos = len(proc_count) - proc_count[(idx, idy, x['hostname'])] = pos -# print t, pos, 100-y.idle, x['hostname'], y.label - procload_heatmap.append("{time},{pos},{value},{host}" \ - .format(time = int(t*1000), pos = pos, value = y.procs, - host = x['hostname'])) + pos = len(proc_count) + proc_count[(idx, idy, x["hostname"])] = pos + # print(t, pos, 100-y.idle, x['hostname'], y.label) + procload_heatmap.append( + "{time},{pos},{value},{host}".format( + time=int(t * 1000), + pos=pos, + value=y.procs, + host=x["hostname"], + ), + ) # all network interface, total cluster - summed1 = [x['net/total'] for x in data_by_all_hosts if x.has_key('net/total')] + summed1 = [ + x["net/total"] + for x in data_by_all_hosts if x.has_key("net/total") + ] - if summed1: - summed = reduce_patched(lambda a,b: a._add(b), summed1) + if summed1: + summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: net = x.get("net/total", None) - if not net: continue + if not net: + continue # recv-byte, send-byte, recv-packet, send-packet, errors - # print t, x['hostname'], net.recv_bytes, net.send_bytes, net.recv_packets, net.send_packets, net.recv_errs+net.send_errs+net.recv_drop+net.send_drop - # print t, summed - network_overall.append("{time},{recv_bytes},{send_bytes},{recv_packets},{send_packets},{errors}" \ - .format(time = int(t*1000), - recv_bytes = summed.recv_bytes / PROBE_INTERVAL, - send_bytes = summed.send_bytes / PROBE_INTERVAL, - recv_packets = summed.recv_packets / PROBE_INTERVAL, - send_packets = summed.send_packets / PROBE_INTERVAL, - errors = (summed.recv_errs + summed.send_errs + \ - summed.recv_drop + summed.send_drop) / PROBE_INTERVAL) - ) + # print(t, x['hostname'], net.recv_bytes, net.send_bytes, net.recv_packets, net.send_packets, net.recv_errs+net.send_errs+net.recv_drop+net.send_drop) + # print(t, summed) + network_overall.append( + "{time},{recv_bytes},{send_bytes},{recv_packets},{send_packets},{errors}".format( + time=int(t * 1000), + recv_bytes=summed.recv_bytes / PROBE_INTERVAL, + send_bytes=summed.send_bytes / PROBE_INTERVAL, + recv_packets=summed.recv_packets / PROBE_INTERVAL, + send_packets=summed.send_packets / PROBE_INTERVAL, + errors=( + summed.recv_errs + + summed.send_errs + + summed.recv_drop + + summed.send_drop + ) + / PROBE_INTERVAL, + ), + ) # all network adapters, plot heatmap according to net/recv_bytes + send_bytes for idx, x in enumerate(data_by_all_hosts): - for idy, y in enumerate(filter_dict_with_prefixes(x, "net", "!net/total").values()): + for idy, y in enumerate( + filter_dict_with_prefixes(x, "net", "!net/total").values(), + ): try: - pos = network_count[(idx, idy, x['hostname'])] + pos = network_count[(idx, idy, x["hostname"])] except: - pos = len(network_count) - network_count[(idx, idy, x['hostname'])] = pos - network_heatmap.append("{time},{pos},{value},{host},{networkid}" \ - .format(time = int(t*1000), - pos = pos*2, - value = y.recv_bytes / PROBE_INTERVAL, - host = x['hostname'], - networkid = y.label+".recv")) - network_heatmap.append("{time},{pos},{value},{host},{networkid}" \ - .format(time = int(t*1000), - pos = pos*2+1, - value = y.send_bytes / PROBE_INTERVAL, - host = x['hostname'], - networkid = y.label+".send")) - + pos = len(network_count) + network_count[(idx, idy, x["hostname"])] = pos + network_heatmap.append( + "{time},{pos},{value},{host},{networkid}".format( + time=int(t * 1000), + pos=pos * 2, + value=y.recv_bytes / PROBE_INTERVAL, + host=x["hostname"], + networkid=y.label + ".recv", + ), + ) + network_heatmap.append( + "{time},{pos},{value},{host},{networkid}".format( + time=int(t * 1000), + pos=pos * 2 + 1, + value=y.send_bytes / PROBE_INTERVAL, + host=x["hostname"], + networkid=y.label + ".send", + ), + ) + with open(samedir("chart-template.html")) as f: template = f.read() - + variables = locals() + def my_replace(match): match = match.group()[1:-1] - if match.endswith('heatmap') or match.endswith('overall'): + if match.endswith("heatmap") or match.endswith("overall"): return "\n".join(variables[match]) - elif match =='events': + elif match == "events": return "\n".join(events) - elif match == 'probe_interval': + elif match == "probe_interval": return str(PROBE_INTERVAL * 1000) - elif match == 'workload_name': + elif match == "workload_name": return workload_title else: - return '{%s}' % match - - with open(report_fn, 'w') as f: - f.write(re.sub(r'{\w+}', my_replace, template)) + return "{%s}" % match + + with open(report_fn, "w") as f: + f.write(re.sub(r"{\w+}", my_replace, template)) + def show_usage(): - log("""Usage: + log( + """Usage: monitor.py ... -""") +""", + ) + -if __name__=="__main__": - if len(sys.argv)<6: +if __name__ == "__main__": + if len(sys.argv) < 6: log(sys.argv) show_usage() sys.exit(1) -# log(sys.argv) + # log(sys.argv) global log_path global report_path global workload_title @@ -836,17 +1190,17 @@ def show_usage(): bench_log_path = sys.argv[4] report_path = sys.argv[5] nodes_to_monitor = sys.argv[6:] - pid=os.fork() - if pid: #parent - print pid - else: #child + pid = os.fork() + if pid: # parent + print(pid) + else: # child os.close(0) os.close(1) os.close(2) -# log("child process start") + # log("child process start") signal.signal(signal.SIGTERM, sig_term_handler) start_monitor(log_path, nodes_to_monitor) - while os.path.exists("/proc/%s" % parent_pid): + while os.path.exists("/proc/%s" % parent_pid): sleep(1) # parent lost, stop! signal.signal(signal.SIGTERM, signal.SIG_IGN) diff --git a/bin/functions/monitor_replot.py b/bin/functions/monitor_replot.py index ed9fc979c..bd0fca3c6 100755 --- a/bin/functions/monitor_replot.py +++ b/bin/functions/monitor_replot.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,14 +13,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations -from monitor import generate_report import sys -if len(sys.argv)<4: - print """Usage: +from monitor import generate_report + +if len(sys.argv) < 4: + print( + """Usage: monitor_replot.py -""" +""", + ) sys.exit(1) generate_report(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/bin/functions/terminalsize.py b/bin/functions/terminalsize.py index 00a8199c3..0871612d1 100644 --- a/bin/functions/terminalsize.py +++ b/bin/functions/terminalsize.py @@ -1,40 +1,42 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 """ from https://gist.github.com/jtriley/1108174 """ +from __future__ import annotations import os +import platform import shlex import struct -import platform import subprocess - - + + def get_terminal_size(): - """ getTerminalSize() - - get width and height of console - - works on linux,os x,windows,cygwin(windows) - originally retrieved from: - http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python + """getTerminalSize() + - get width and height of console + - works on linux,os x,windows,cygwin(windows) + originally retrieved from: + http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python """ current_os = platform.system() tuple_xy = None - if current_os == 'Windows': + if current_os == "Windows": tuple_xy = _get_terminal_size_windows() if tuple_xy is None: tuple_xy = _get_terminal_size_tput() # needed for window's python in cygwin's xterm! - if current_os in ['Linux', 'Darwin'] or current_os.startswith('CYGWIN'): + if current_os in ["Linux", "Darwin"] or current_os.startswith("CYGWIN"): tuple_xy = _get_terminal_size_linux() if tuple_xy is None: - print "default" - tuple_xy = (80, 25) # default value + print("default") + tuple_xy = (80, 25) # default value return tuple_xy - - + + def _get_terminal_size_windows(): try: - from ctypes import windll, create_string_buffer + from ctypes import create_string_buffer, windll + # stdin handle is -10 # stdout handle is -11 # stderr handle is -12 @@ -42,37 +44,51 @@ def _get_terminal_size_windows(): csbi = create_string_buffer(22) res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi) if res: - (bufx, bufy, curx, cury, wattr, - left, top, right, bottom, - maxx, maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) + ( + bufx, + bufy, + curx, + cury, + wattr, + left, + top, + right, + bottom, + maxx, + maxy, + ) = struct.unpack("hhhhHhhhhhh", csbi.raw) sizex = right - left + 1 sizey = bottom - top + 1 return sizex, sizey except: pass - - + + def _get_terminal_size_tput(): # get terminal width # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width-height-of-a-terminal-window try: - cols = int(subprocess.check_call(shlex.split('tput cols'))) - rows = int(subprocess.check_call(shlex.split('tput lines'))) + cols = int(subprocess.check_call(shlex.split("tput cols"))) + rows = int(subprocess.check_call(shlex.split("tput lines"))) return (cols, rows) except: pass - - + + def _get_terminal_size_linux(): def ioctl_GWINSZ(fd): try: import fcntl import termios - cr = struct.unpack('hh', - fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) + + cr = struct.unpack( + "hh", + fcntl.ioctl(fd, termios.TIOCGWINSZ, "1234"), + ) return cr except: pass + cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) if not cr: try: @@ -83,11 +99,12 @@ def ioctl_GWINSZ(fd): pass if not cr: try: - cr = (os.environ['LINES'], os.environ['COLUMNS']) + cr = (os.environ["LINES"], os.environ["COLUMNS"]) except: return None return int(cr[1]), int(cr[0]) - + + if __name__ == "__main__": sizex, sizey = get_terminal_size() - print 'width =', sizex, 'height =', sizey + print("width =", sizex, "height =", sizey) diff --git a/bin/functions/test_load_config.py b/bin/functions/test_load_config.py index 000760b43..59ac97e66 100644 --- a/bin/functions/test_load_config.py +++ b/bin/functions/test_load_config.py @@ -1,10 +1,13 @@ -import unittest -import os -import load_config -import mock +from __future__ import annotations + import fnmatch -import re import glob +import os +import re +import unittest +from unittest import mock + +import load_config def print_hint_seperator(hint): @@ -27,10 +30,10 @@ def parse_conf(): line = line.strip() if not line: continue # skip empty lines - if line[0] == '#': + if line[0] == "#": continue # skip comments try: - key, value = re.split("\s", line, 1) + key, value = re.split(r"\s", line, 1) except ValueError: key = line.strip() value = "" @@ -51,10 +54,10 @@ def get_expected(name): line = line.strip() if not line: continue # skip empty lines - if line[0] == '#': + if line[0] == "#": continue # skip comments try: - key, value = re.split("\s", line, 1) + key, value = re.split(r"\s", line, 1) except ValueError: key = line.strip() value = "" @@ -64,10 +67,8 @@ def get_expected(name): def test_probe_hadoop_examples_jars(): - def test_probe_hadoop_examples_jars_generator(case_num): def test(self): - def exactly_one_file_one_candidate(filename_pattern): regex = fnmatch.translate(filename_pattern) reobj = re.compile(regex) @@ -77,27 +78,37 @@ def exactly_one_file_one_candidate(filename_pattern): return "" mock_exactly_one_file_one_candidate = mock.Mock( - side_effect=exactly_one_file_one_candidate) - with mock.patch("load_config.exactly_one_file_one_candidate", - mock_exactly_one_file_one_candidate): + side_effect=exactly_one_file_one_candidate, + ) + with mock.patch( + "load_config.exactly_one_file_one_candidate", + mock_exactly_one_file_one_candidate, + ): try: from load_config import probe_hadoop_examples_jars + probe_hadoop_examples_jars() except: pass answer = load_config.HibenchConf["hibench.hadoop.examples.jar"] self.assertEqual( - os.path.abspath(answer), os.path.abspath( - hadoop_examples_jars_list[case_num][1])) + os.path.abspath(answer), + os.path.abspath( + hadoop_examples_jars_list[case_num][1], + ), + ) return test - hadoop_examples_jars_list = [["apache0", - "/tmp/test/hadoop_home/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar"] - ] + hadoop_examples_jars_list = [ + [ + "apache0", + "/tmp/test/hadoop_home/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar", + ], + ] for i in range(len(hadoop_examples_jars_list)): - test_name = 'test_%s' % hadoop_examples_jars_list[i][0] + test_name = "test_%s" % hadoop_examples_jars_list[i][0] test = test_probe_hadoop_examples_jars_generator(i) setattr(ProbeHadoopExamplesTestCase, test_name, test) @@ -108,7 +119,6 @@ def exactly_one_file_one_candidate(filename_pattern): def test_probe_hadoop_test_examples_jars(): def test_probe_hadoop_examples_jars_generator(case_num): def test(self): - def exactly_one_file_one_candidate(filename_pattern): regex = fnmatch.translate(filename_pattern) reobj = re.compile(regex) @@ -118,28 +128,37 @@ def exactly_one_file_one_candidate(filename_pattern): return "" mock_exactly_one_file_one_candidate = mock.Mock( - side_effect=exactly_one_file_one_candidate) - with mock.patch("load_config.exactly_one_file_one_candidate", - mock_exactly_one_file_one_candidate): + side_effect=exactly_one_file_one_candidate, + ) + with mock.patch( + "load_config.exactly_one_file_one_candidate", + mock_exactly_one_file_one_candidate, + ): try: from load_config import probe_hadoop_examples_test_jars + probe_hadoop_examples_test_jars() except: pass - answer = load_config.HibenchConf[ - "hibench.hadoop.examples.test.jar"] + answer = load_config.HibenchConf["hibench.hadoop.examples.test.jar"] self.assertEqual( - os.path.abspath(answer), os.path.abspath( - hadoop_test_examples_jars_list[case_num][1])) + os.path.abspath(answer), + os.path.abspath( + hadoop_test_examples_jars_list[case_num][1], + ), + ) return test - hadoop_test_examples_jars_list = [["apache0", - "/tmp/test/hadoop_home/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.7.3-tests.jar"] - ] + hadoop_test_examples_jars_list = [ + [ + "apache0", + "/tmp/test/hadoop_home/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.7.3-tests.jar", + ], + ] for i in range(len(hadoop_test_examples_jars_list)): - test_name = 'test_%s' % hadoop_test_examples_jars_list[i][0] + test_name = "test_%s" % hadoop_test_examples_jars_list[i][0] test = test_probe_hadoop_examples_jars_generator(i) setattr(ProbeHadoopTestExamplesTestCase, test_name, test) @@ -170,8 +189,7 @@ def test_probe_hadoop_conf_dir(): def test_probe_spark_conf_value(): def test_probe_spark_conf_value_generator(case_num): def test(self): - load_config.HibenchConf[ - "hibench.spark.home"] = "/tmp/test/spark_home" + load_config.HibenchConf["hibench.spark.home"] = "/tmp/test/spark_home" conf_name = spark_conf_test_case_list[case_num][1] line = spark_conf_test_case_list[case_num][2] @@ -184,40 +202,50 @@ def read_file_content(filepath): return [] mock_read_file_content = mock.Mock(side_effect=read_file_content) - with mock.patch("load_config.read_file_content", - mock_read_file_content): + with mock.patch( + "load_config.read_file_content", + mock_read_file_content, + ): answer = "" try: from load_config import probe_spark_conf_value + answer = probe_spark_conf_value(conf_name, default) except: pass expected = default if len(line.split("=")) >= 2: expected = line.split("=")[1] - expected = expected.strip("\'") - expected = expected.strip("\"") + expected = expected.strip("'") + expected = expected.strip('"') self.assertEqual(str(answer), expected) return test - spark_conf_test_case_list = [["spark_master_webui_port_simple", - "SPARK_MASTER_WEBUI_PORT", - "export SPARK_MASTER_WEBUI_PORT=8880", - "8080"], - ["spark_master_webui_port_single_quotes", - "SPARK_MASTER_WEBUI_PORT", - "export SPARK_MASTER_WEBUI_PORT=\'8880\'", - "8080"], - ["spark_master_webui_port_double_quotes", - "SPARK_MASTER_WEBUI_PORT", - "export SPARK_MASTER_WEBUI_PORT=\"8880\"", - "8080"], - ] + spark_conf_test_case_list = [ + [ + "spark_master_webui_port_simple", + "SPARK_MASTER_WEBUI_PORT", + "export SPARK_MASTER_WEBUI_PORT=8880", + "8080", + ], + [ + "spark_master_webui_port_single_quotes", + "SPARK_MASTER_WEBUI_PORT", + "export SPARK_MASTER_WEBUI_PORT='8880'", + "8080", + ], + [ + "spark_master_webui_port_double_quotes", + "SPARK_MASTER_WEBUI_PORT", + 'export SPARK_MASTER_WEBUI_PORT="8880"', + "8080", + ], + ] for i in range(len(spark_conf_test_case_list)): - test_name = 'test_%s' % spark_conf_test_case_list[i][0] + test_name = "test_%s" % spark_conf_test_case_list[i][0] test = test_probe_spark_conf_value_generator(i) setattr(ProbeSparkConfValueTestCase, test_name, test) print_hint_seperator("Test probe spark conf value") @@ -235,27 +263,22 @@ def test_probe_masters_slaves_hostnames(): class ProbeHadoopExamplesTestCase(unittest.TestCase): - def setUp(self): - load_config.HibenchConf[ - "hibench.hadoop.home"] = "/tmp/test/hadoop_home" + load_config.HibenchConf["hibench.hadoop.home"] = "/tmp/test/hadoop_home" def tearDown(self): load_config.HibenchConf["hibench.hadoop.examples.jar"] = "" class ProbeHadoopTestExamplesTestCase(unittest.TestCase): - def setUp(self): - load_config.HibenchConf[ - "hibench.hadoop.home"] = "/tmp/test/hadoop_home" + load_config.HibenchConf["hibench.hadoop.home"] = "/tmp/test/hadoop_home" def tearDown(self): load_config.HibenchConf["hibench.hadoop.examples.test.jar"] = "" class ProbeJavaBinTestCase(unittest.TestCase): - def setUp(self): pass @@ -270,7 +293,6 @@ def test_probe_java_bin(self): class ProbeHadoopReleaseTestCase(unittest.TestCase): - def setUp(self): pass @@ -286,7 +308,6 @@ def test_probe_hadoop_release(self): class ProbeSparkVersionTestCase(unittest.TestCase): - def setUp(self): pass @@ -302,14 +323,15 @@ def test_probe_spark_version(self): class ProbeHadoopConfDirTestCase(unittest.TestCase): - def expected_hadoop_conf_dir(self): if not load_config.HibenchConf.get("hibench.hadoop.configure.dir", ""): hadoop_conf_dir = os.path.join( - load_config.HibenchConf["hibench.hadoop.home"], "etc", "hadoop") + load_config.HibenchConf["hibench.hadoop.home"], + "etc", + "hadoop", + ) else: - hadoop_conf_dir = load_config.HibenchConf[ - "hibench.hadoop.configure.dir"] + hadoop_conf_dir = load_config.HibenchConf["hibench.hadoop.configure.dir"] return hadoop_conf_dir def setUp(self): @@ -327,7 +349,6 @@ def test_probe_hadoop_conf_dir(self): class ProbeSparkConfValueTestCase(unittest.TestCase): - def setUp(self): pass @@ -344,20 +365,21 @@ def test_probe_spark_conf_value_default_value(self): class ProbeJavaOptsTestCase(unittest.TestCase): - def setUp(self): + load_config.HibenchConf["hibench.hadoop.home"] = "/tmp/test/hadoop_home" load_config.HibenchConf[ - "hibench.hadoop.home"] = "/tmp/test/hadoop_home" - load_config.HibenchConf[ - "hibench.hadoop.configure.dir"] = "/tmp/test/hadoop_home/etc/hadoop" + "hibench.hadoop.configure.dir" + ] = "/tmp/test/hadoop_home/etc/hadoop" def tearDown(self): pass def test_probe_java_opts(self): - mapred_site_path = load_config.HibenchConf[ - "hibench.hadoop.configure.dir"] + "/mapred-site.xml" + mapred_site_path = ( + load_config.HibenchConf["hibench.hadoop.configure.dir"] + + "/mapred-site.xml" + ) mapred_site_content = "mapreduce.map.java.opts-Xmx1536M -DpreferIPv4Stack=truemapreduce.reduce.java.opts-Xmx1536M -DpreferIPv4Stack=true" expected_map_java_opts = "-Xmx1536M -DpreferIPv4Stack=true" @@ -370,36 +392,41 @@ def read_file_content_java_opts(filepath): return [] mock_read_file_content_java_opts = mock.Mock( - side_effect=read_file_content_java_opts) - with mock.patch("load_config.read_file_content", - mock_read_file_content_java_opts): + side_effect=read_file_content_java_opts, + ) + with mock.patch( + "load_config.read_file_content", + mock_read_file_content_java_opts, + ): answer = "" try: from load_config import probe_java_opts + probe_java_opts() answer = "" except: pass answer_map_java_opts = load_config.HibenchConf[ - 'hibench.dfsioe.map.java_opts'] + "hibench.dfsioe.map.java_opts" + ] answer_red_java_opts = load_config.HibenchConf[ - 'hibench.dfsioe.red.java_opts'] - if answer_map_java_opts.startswith("\'"): - expected_map_java_opts = "\'" + expected_map_java_opts + "\'" - elif answer_map_java_opts.startswith("\""): - expected_map_java_opts = "\"" + expected_map_java_opts + "\"" + "hibench.dfsioe.red.java_opts" + ] + if answer_map_java_opts.startswith("'"): + expected_map_java_opts = "'" + expected_map_java_opts + "'" + elif answer_map_java_opts.startswith('"'): + expected_map_java_opts = '"' + expected_map_java_opts + '"' self.assertEqual(answer_map_java_opts, expected_map_java_opts) - if answer_red_java_opts.startswith("\'"): - expected_reduce_java_opts = "\'" + expected_reduce_java_opts + "\'" - elif answer_red_java_opts.startswith("\""): - expected_reduce_java_opts = "\"" + expected_reduce_java_opts + "\"" + if answer_red_java_opts.startswith("'"): + expected_reduce_java_opts = "'" + expected_reduce_java_opts + "'" + elif answer_red_java_opts.startswith('"'): + expected_reduce_java_opts = '"' + expected_reduce_java_opts + '"' self.assertEqual(answer_red_java_opts, expected_reduce_java_opts) class ProbeMastersSlavesHostnamesTestCase(unittest.TestCase): - def setUp(self): pass @@ -408,20 +435,21 @@ def tearDown(self): def test_probe_masters_slaves_hostnames(self): load_config.probe_masters_slaves_hostnames() - answer_masters_hostnames = load_config.HibenchConf[ - 'hibench.masters.hostnames'] - answer_slaves_hostnames = load_config.HibenchConf[ - 'hibench.slaves.hostnames'] + answer_masters_hostnames = load_config.HibenchConf["hibench.masters.hostnames"] + answer_slaves_hostnames = load_config.HibenchConf["hibench.slaves.hostnames"] expected_masters_hostnames = get_expected("hibench.masters.hostnames") expected_slaves_hostnames = get_expected("hibench.slaves.hostnames") self.assertEqual( - answer_masters_hostnames.strip("\'"), - expected_masters_hostnames) + answer_masters_hostnames.strip("'"), + expected_masters_hostnames, + ) self.assertEqual( - answer_slaves_hostnames.strip("\'"), - expected_slaves_hostnames) + answer_slaves_hostnames.strip("'"), + expected_slaves_hostnames, + ) + -if __name__ == '__main__': +if __name__ == "__main__": test_probe_hadoop_examples_jars() test_probe_hadoop_test_examples_jars() test_probe_java_bin() diff --git a/bin/report_gen_plot.py b/bin/report_gen_plot.py index 1219b50c3..b89751aa6 100755 --- a/bin/report_gen_plot.py +++ b/bin/report_gen_plot.py @@ -1,123 +1,218 @@ -#!/usr/bin/env python -#coding: utf-8 - -import sys, os, re +#!/usr/bin/env python3 +from __future__ import annotations + +import os +import re +import sys +from collections import defaultdict +from collections import namedtuple from pprint import pprint -from collections import defaultdict, namedtuple + import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import matplotlib.colors as colors import matplotlib.cm as cmx +import matplotlib.colors as colors +import matplotlib.pyplot as plt import numpy as np -RecordRaw=namedtuple("RecordRaw", "type durtation data_size throughput_total throughput_per_node") -Record=namedtuple("Record", "type language durtation data_size throughput_total throughput_per_node") +matplotlib.use("Agg") + +RecordRaw = namedtuple( + "RecordRaw", + "type durtation data_size throughput_total throughput_per_node", +) +Record = namedtuple( + "Record", + "type language durtation data_size throughput_total throughput_per_node", +) + def human_readable_size(n): "convert number into human readable string" - if n<1000: return str(n) - if n<800000: return "%.3fK" % (n/1000.0) - if n<800000000: return "%.3fM" % (n/1000000.0) - if n<800000000000: return "%.3fG" % (n/1000000000.0) - return "%.3fT" % (n/1000000000000.0) + if n < 1000: + return str(n) + if n < 800000: + return "%.3fK" % (n / 1000.0) + if n < 800000000: + return "%.3fM" % (n / 1000000.0) + if n < 800000000000: + return "%.3fG" % (n / 1000000000.0) + return "%.3fT" % (n / 1000000000000.0) + def group_by_type(datas): groups = defaultdict(dict) for i in datas: - words = re.sub(r'((?<=[a-z])[A-Z]|(? Date: Sun, 24 Jul 2022 22:21:43 +0530 Subject: [PATCH 02/22] Upgraded to python3.x version support --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 740dcf014..c55cc6d65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: # - id: isort # args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black From 1e597a00f1cd372635f2112d0de74448932a7cfe Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 22:22:01 +0530 Subject: [PATCH 03/22] Upgraded to python3.x version support --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c55cc6d65..32c9e1848 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,13 +45,13 @@ repos: # hooks: # - id: mypy -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort - - repo: https://github.com/psf/black rev: 22.3.0 hooks: - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 53e5a81e787123dc233b566298ebd4d87025636e Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 22:34:39 +0530 Subject: [PATCH 04/22] removed the import --- .pre-commit-config.yaml | 78 ++++++++++++++++++------------------ bin/functions/load_config.py | 12 +----- 2 files changed, 41 insertions(+), 49 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32c9e1848..8d7c85513 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,44 +14,44 @@ repos: rev: v3.8.2 hooks: - id: reorder-python-imports - args: [--py38-plus, --add-import, 'from __future__ import annotations'] - -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade args: [--py38-plus] - -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.961 -# hooks: -# - id: mypy - -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/pre-commit/mirrors-autopep8 +# rev: v1.6.0 +# hooks: +# - id: autopep8 + +# - repo: https://github.com/PyCQA/flake8 +# rev: 4.0.1 +# hooks: +# - id: flake8 +# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +# - repo: https://github.com/asottile/add-trailing-comma +# rev: v2.2.3 +# hooks: +# - id: add-trailing-comma +# args: [--py36-plus] + +# - repo: https://github.com/asottile/pyupgrade +# rev: v2.37.2 +# hooks: +# - id: pyupgrade +# args: [--py38-plus] + + +# #- repo: https://github.com/pre-commit/mirrors-mypy +# # rev: v0.961 +# # hooks: +# # - id: mypy + +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/load_config.py b/bin/functions/load_config.py index 99bf23134..d9e9bc047 100755 --- a/bin/functions/load_config.py +++ b/bin/functions/load_config.py @@ -13,8 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations - import fcntl import glob import os @@ -353,10 +351,7 @@ def wildcard_replacement(key, value): HibenchConf[key] = value finish = False - wildcard_rules = [ - (key, HibenchConf[key]) - for key in HibenchConf if "*" in key - ] + wildcard_rules = [(key, HibenchConf[key]) for key in HibenchConf if "*" in key] # now, let's check wildcard replacement rules for key, value in wildcard_rules: # check if we found a rule like: aaa.*.ccc.*.ddd -> bbb.*.* @@ -554,10 +549,7 @@ def probe_masters_slaves_by_Yarn(): ) cmd = "( " + yarn_executable + " node -list 2> /dev/null | grep RUNNING )" try: - worker_hostnames = [ - line.split(":")[0] - for line in shell(cmd).split("\n") - ] + worker_hostnames = [line.split(":")[0] for line in shell(cmd).split("\n")] HibenchConf["hibench.slaves.hostnames"] = " ".join(worker_hostnames) HibenchConfRef["hibench.slaves.hostnames"] = ( "Probed by parsing results from: " + cmd From 90897cad80d629845bf07c1a7cbb0a61045b7c78 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 22:34:55 +0530 Subject: [PATCH 05/22] removed the import --- .pre-commit-config.yaml | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d7c85513..d1b6fe921 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -# - repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.6.0 -# hooks: -# - id: autopep8 - -# - repo: https://github.com/PyCQA/flake8 -# rev: 4.0.1 -# hooks: -# - id: flake8 -# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -# - repo: https://github.com/asottile/add-trailing-comma -# rev: v2.2.3 -# hooks: -# - id: add-trailing-comma -# args: [--py36-plus] - -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.37.2 -# hooks: -# - id: pyupgrade -# args: [--py38-plus] - - -# #- repo: https://github.com/pre-commit/mirrors-mypy -# # rev: v0.961 -# # hooks: -# # - id: mypy - -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 + +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] + +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] + + +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v0.961 +# hooks: +# - id: mypy + +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 5b0cdf08004ef7e766a00d2324d094311b2b2295 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 22:52:51 +0530 Subject: [PATCH 06/22] change the dict syntax --- bin/functions/hibench_prop_env_mapping.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py index ecca09fdd..19a598b93 100644 --- a/bin/functions/hibench_prop_env_mapping.py +++ b/bin/functions/hibench_prop_env_mapping.py @@ -16,7 +16,6 @@ """ Mapping from properties to environment variable names """ -from __future__ import annotations HiBenchEnvPropMappingMandatory = dict( JAVA_BIN="java.bin", @@ -241,7 +240,7 @@ STREAMBENCH_FLINK_PARALLELISM="hibench.streambench.flink.parallelism", ) -HiBenchPropEnvMapping = {v: k for k, v in HiBenchEnvPropMapping.items()} +HiBenchPropEnvMapping = {v: k for k, v in list(HiBenchEnvPropMapping.items())} HiBenchPropEnvMappingMandatory = { - v: k for k, v in HiBenchEnvPropMappingMandatory.items() + v: k for k, v in list(HiBenchEnvPropMappingMandatory.items()) } From 9637d279de6fd3b738a36b07c3c301bd95af513a Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:15:27 +0530 Subject: [PATCH 07/22] change the dict syntax --- .pre-commit-config.yaml | 78 +++++++++++------------ bin/functions/hibench_prop_env_mapping.py | 6 +- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d1b6fe921..8d7c85513 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade - args: [--py38-plus] - - -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.961 -# hooks: -# - id: mypy - -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/pre-commit/mirrors-autopep8 +# rev: v1.6.0 +# hooks: +# - id: autopep8 + +# - repo: https://github.com/PyCQA/flake8 +# rev: 4.0.1 +# hooks: +# - id: flake8 +# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +# - repo: https://github.com/asottile/add-trailing-comma +# rev: v2.2.3 +# hooks: +# - id: add-trailing-comma +# args: [--py36-plus] + +# - repo: https://github.com/asottile/pyupgrade +# rev: v2.37.2 +# hooks: +# - id: pyupgrade +# args: [--py38-plus] + + +# #- repo: https://github.com/pre-commit/mirrors-mypy +# # rev: v0.961 +# # hooks: +# # - id: mypy + +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/hibench_prop_env_mapping.py b/bin/functions/hibench_prop_env_mapping.py index 19a598b93..457b3306e 100644 --- a/bin/functions/hibench_prop_env_mapping.py +++ b/bin/functions/hibench_prop_env_mapping.py @@ -240,7 +240,5 @@ STREAMBENCH_FLINK_PARALLELISM="hibench.streambench.flink.parallelism", ) -HiBenchPropEnvMapping = {v: k for k, v in list(HiBenchEnvPropMapping.items())} -HiBenchPropEnvMappingMandatory = { - v: k for k, v in list(HiBenchEnvPropMappingMandatory.items()) -} +HiBenchPropEnvMapping = dict([(v,k) for k, v in HiBenchEnvPropMapping.items()]) +HiBenchPropEnvMappingMandatory = dict([(v,k) for k, v in HiBenchEnvPropMappingMandatory.items()]) From d588a93cc28cf4da421910626d8e42c5eae541f8 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:15:52 +0530 Subject: [PATCH 08/22] change the dict syntax --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d7c85513..b57cee5a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,10 +16,10 @@ repos: - id: reorder-python-imports args: [--py38-plus] -# - repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.6.0 -# hooks: -# - id: autopep8 +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 # - repo: https://github.com/PyCQA/flake8 # rev: 4.0.1 From 5c158cbb734cfb7d3b802f83f83fb3dd0c661ace Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:16:07 +0530 Subject: [PATCH 09/22] change the dict syntax --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b57cee5a3..2f65464c4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,11 +21,11 @@ repos: hooks: - id: autopep8 -# - repo: https://github.com/PyCQA/flake8 -# rev: 4.0.1 -# hooks: -# - id: flake8 -# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] # - repo: https://github.com/asottile/add-trailing-comma # rev: v2.2.3 From 19a1dc08dbfa217681e2f83901146f019cf16b00 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:16:38 +0530 Subject: [PATCH 10/22] uncommented the pre-commit yaml --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2f65464c4..ed13fbbaf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,11 +27,11 @@ repos: - id: flake8 args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] -# - repo: https://github.com/asottile/add-trailing-comma -# rev: v2.2.3 -# hooks: -# - id: add-trailing-comma -# args: [--py36-plus] +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] # - repo: https://github.com/asottile/pyupgrade # rev: v2.37.2 From 630756145d92eeb4454f9b39f1ab8f0f041f5f5b Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:16:56 +0530 Subject: [PATCH 11/22] uncommented the pre-commit yaml --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ed13fbbaf..1644075d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,11 +33,11 @@ repos: - id: add-trailing-comma args: [--py36-plus] -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.37.2 -# hooks: -# - id: pyupgrade -# args: [--py38-plus] +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] # #- repo: https://github.com/pre-commit/mirrors-mypy From 1be42f6d52b7148c05b14bc7f14974953e6f9fbb Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:17:12 +0530 Subject: [PATCH 12/22] uncommented the pre-commit yaml --- .pre-commit-config.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1644075d8..44f0ab8e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,13 +45,13 @@ repos: # # hooks: # # - id: mypy -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 4f4d8df108b89caee2fecd7dc209b2600a41afe6 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:36:37 +0530 Subject: [PATCH 13/22] Fix the dict syntax issues --- .pre-commit-config.yaml | 78 ++++++++++++++++++------------------ bin/functions/load_config.py | 5 +-- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44f0ab8e4..218ac5331 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade - args: [--py38-plus] - - -# #- repo: https://github.com/pre-commit/mirrors-mypy -# # rev: v0.961 -# # hooks: -# # - id: mypy - -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/pre-commit/mirrors-autopep8 +# rev: v1.6.0 +# hooks: +# - id: autopep8 + +# - repo: https://github.com/PyCQA/flake8 +# rev: 4.0.1 +# hooks: +# - id: flake8 +# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +# - repo: https://github.com/asottile/add-trailing-comma +# rev: v2.2.3 +# hooks: +# - id: add-trailing-comma +# args: [--py36-plus] + +# - repo: https://github.com/asottile/pyupgrade +# rev: v2.37.2 +# hooks: +# - id: pyupgrade +# args: [--py38-plus] + + +# # #- repo: https://github.com/pre-commit/mirrors-mypy +# # # rev: v0.961 +# # # hooks: +# # # - id: mypy + +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/load_config.py b/bin/functions/load_config.py index d9e9bc047..1324e9f42 100755 --- a/bin/functions/load_config.py +++ b/bin/functions/load_config.py @@ -181,9 +181,8 @@ def parse_conf(conf_root, workload_config_file): def override_conf_from_environment(): # override values from os environment variable settings - for env_name, prop_name in ( - HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() - ): + HiBenchEnvPropMappingMandatory.update(HiBenchEnvPropMapping.items()) + for env_name, prop_name in HiBenchEnvPropMappingMandatory.items(): # The overrides from environments has 2 premises, the second one is either # the prop_name is not set in advance by config files or the conf line # itself set an env variable to a hibench conf From 12a3216243a73a7ce852f1a5095037f83193d639 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:36:58 +0530 Subject: [PATCH 14/22] Uncommented --- .pre-commit-config.yaml | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 218ac5331..44f0ab8e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -# - repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.6.0 -# hooks: -# - id: autopep8 - -# - repo: https://github.com/PyCQA/flake8 -# rev: 4.0.1 -# hooks: -# - id: flake8 -# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -# - repo: https://github.com/asottile/add-trailing-comma -# rev: v2.2.3 -# hooks: -# - id: add-trailing-comma -# args: [--py36-plus] - -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.37.2 -# hooks: -# - id: pyupgrade -# args: [--py38-plus] - - -# # #- repo: https://github.com/pre-commit/mirrors-mypy -# # # rev: v0.961 -# # # hooks: -# # # - id: mypy - -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 + +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] + +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] + + +# #- repo: https://github.com/pre-commit/mirrors-mypy +# # rev: v0.961 +# # hooks: +# # - id: mypy + +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 374c6fa1ca8cc25b373948d6522a85603f90baa2 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:49:13 +0530 Subject: [PATCH 15/22] Fix the dict syntax issues --- .pre-commit-config.yaml | 62 ++++++++++++++++++------------------ bin/functions/load_config.py | 13 ++++---- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44f0ab8e4..8d7c85513 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,28 +16,28 @@ repos: - id: reorder-python-imports args: [--py38-plus] -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade - args: [--py38-plus] +# - repo: https://github.com/pre-commit/mirrors-autopep8 +# rev: v1.6.0 +# hooks: +# - id: autopep8 + +# - repo: https://github.com/PyCQA/flake8 +# rev: 4.0.1 +# hooks: +# - id: flake8 +# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +# - repo: https://github.com/asottile/add-trailing-comma +# rev: v2.2.3 +# hooks: +# - id: add-trailing-comma +# args: [--py36-plus] + +# - repo: https://github.com/asottile/pyupgrade +# rev: v2.37.2 +# hooks: +# - id: pyupgrade +# args: [--py38-plus] # #- repo: https://github.com/pre-commit/mirrors-mypy @@ -45,13 +45,13 @@ repos: # # hooks: # # - id: mypy -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/load_config.py b/bin/functions/load_config.py index 1324e9f42..e9d32c5cc 100755 --- a/bin/functions/load_config.py +++ b/bin/functions/load_config.py @@ -199,7 +199,8 @@ def override_conf_from_environment(): def override_conf_by_paching_conf(): # override values from os environment variable settings - # for env_name, prop_name in HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items(): + # HiBenchEnvPropMappingMandatory.update(HiBenchEnvPropMapping.items()) + # for env_name, prop_name in HiBenchEnvPropMappingMandatory.items(): # if env_name in os.environ: # env_value = os.getenv(env_name) # HibenchConf[prop_name] = env_value @@ -247,9 +248,8 @@ def check_config(): # check configures "Mandatory configure missing: %s" % prop_name ) # Ensure all ref values in configure has been expanded - for _, prop_name in ( - HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() - ): + HiBenchEnvPropMappingMandatory.update(HiBenchEnvPropMapping.items()) + for _, prop_name in HiBenchEnvPropMappingMandatory.items(): assert "${" not in HibenchConf.get( prop_name, "", @@ -784,9 +784,8 @@ def export_config(workload_name, framework_name): # generate configure for hibench sources = defaultdict(list) - for env_name, prop_name in ( - HiBenchEnvPropMappingMandatory.items() + HiBenchEnvPropMapping.items() - ): + HiBenchEnvPropMappingMandatory.update(HiBenchEnvPropMapping.items()) + for env_name, prop_name in HiBenchEnvPropMappingMandatory.items(): source = HibenchConfRef.get(prop_name, "None") sources[source].append( "{}={}".format( From d22a887a1d91f138d7c32c5125d06b4392b69f7a Mon Sep 17 00:00:00 2001 From: root Date: Sun, 24 Jul 2022 23:49:30 +0530 Subject: [PATCH 16/22] Uncommented --- .pre-commit-config.yaml | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d7c85513..d1b6fe921 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -# - repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.6.0 -# hooks: -# - id: autopep8 - -# - repo: https://github.com/PyCQA/flake8 -# rev: 4.0.1 -# hooks: -# - id: flake8 -# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -# - repo: https://github.com/asottile/add-trailing-comma -# rev: v2.2.3 -# hooks: -# - id: add-trailing-comma -# args: [--py36-plus] - -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.37.2 -# hooks: -# - id: pyupgrade -# args: [--py38-plus] - - -# #- repo: https://github.com/pre-commit/mirrors-mypy -# # rev: v0.961 -# # hooks: -# # - id: mypy - -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 + +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] + +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] + + +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v0.961 +# hooks: +# - id: mypy + +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 404015dc8f89c3714b95f3a2cfbfa201c42d1008 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Jul 2022 11:53:49 +0530 Subject: [PATCH 17/22] Change the syntax to support python3.8 --- .pre-commit-config.yaml | 20 ++++++++++---------- bin/functions/execute_with_log.py | 2 +- bin/functions/monitor.py | 24 ++++++++++++++---------- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d1b6fe921..e03b800d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,13 +45,13 @@ repos: # hooks: # - id: mypy -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/execute_with_log.py b/bin/functions/execute_with_log.py index d9b7f8076..6d80c1aae 100755 --- a/bin/functions/execute_with_log.py +++ b/bin/functions/execute_with_log.py @@ -128,7 +128,7 @@ def execute(workload_result_file, command_lines): try: line = line.rstrip() - log_file.write(line + "\n") + log_file.write(str(line) + "\n") log_file.flush() except KeyboardInterrupt: proc.terminate() diff --git a/bin/functions/monitor.py b/bin/functions/monitor.py index 176742b96..6e50901b5 100755 --- a/bin/functions/monitor.py +++ b/bin/functions/monitor.py @@ -307,7 +307,7 @@ def commit(self, timestamp, header, stat): # print(stat_delta) stat_delta[header + "/total"] = reduce_patched( lambda a, b: a._add(b, "total"), - stat_delta.values(), + list(stat_delta.values()), ) self.rproc.aggregate(timestamp, stat_delta) @@ -617,13 +617,13 @@ def commit_aggregate(self, node, datas): f.write(repr(datas) + "\n") def run(self): - for v in self.node_pool.values(): + for v in list(self.node_pool.values()): v.start() def stop(self): - for v in self.node_pool.values(): + for v in list(self.node_pool.values()): v.stop() - for v in self.node_pool.values(): + for v in list(self.node_pool.values()): v.join() @@ -649,7 +649,7 @@ def round_to_base(v, b): def filter_dict_with_prefix(d, prefix, sort=True): - keys = sorted(d.keys()) if sort else d.keys() + keys = sorted(d.keys()) if sort else list(d.keys()) if prefix[0] == "!": return {x: d[x] for x in keys if not x.startswith(prefix[1:])} else: @@ -911,8 +911,9 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): # all cpu cores, total cluster summed1 = [ x["cpu/total"] - for x in data_by_all_hosts if x.has_key("cpu/total") + for x in data_by_all_hosts if "cpu/total" in x ] + if summed1: summed = reduce_patched( lambda a, b: a._add(b), @@ -959,8 +960,10 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): # all disk of each node, total cluster summed1 = [ - x["disk/total"] for x in data_by_all_hosts if x.has_key("disk/total") + x["disk/total"] + for x in data_by_all_hosts if "disk/total" in x ] + if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: @@ -1003,7 +1006,8 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): # memory of each node, total cluster summed1 = [ - x["memory/total"] for x in data_by_all_hosts if x.has_key("memory/total") + x["memory/total"] + for x in data_by_all_hosts if "memory/total" in x ] if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) @@ -1044,7 +1048,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): ) # proc of each node, total cluster - summed1 = [x["proc"] for x in data_by_all_hosts if x.has_key("proc")] + summed1 = [x["proc"] for x in data_by_all_hosts if "proc" in x] if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: @@ -1083,7 +1087,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): # all network interface, total cluster summed1 = [ x["net/total"] - for x in data_by_all_hosts if x.has_key("net/total") + for x in data_by_all_hosts if "net/total" in x ] if summed1: From adb863d07a08b1882710918a537a12f68596a18a Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Jul 2022 12:08:31 +0530 Subject: [PATCH 18/22] Uncommented --- .pre-commit-config.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e03b800d6..d1b6fe921 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,13 +45,13 @@ repos: # hooks: # - id: mypy -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From c0f848c2f6d1b330ef27c457f0c201593e8735a1 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Jul 2022 14:23:08 +0530 Subject: [PATCH 19/22] Debug statement --- .pre-commit-config.yaml | 78 ++++++++++++++++++------------------ bin/functions/load_config.py | 2 + 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d1b6fe921..8d7c85513 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade - args: [--py38-plus] - - -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.961 -# hooks: -# - id: mypy - -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +# - repo: https://github.com/pre-commit/mirrors-autopep8 +# rev: v1.6.0 +# hooks: +# - id: autopep8 + +# - repo: https://github.com/PyCQA/flake8 +# rev: 4.0.1 +# hooks: +# - id: flake8 +# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +# - repo: https://github.com/asottile/add-trailing-comma +# rev: v2.2.3 +# hooks: +# - id: add-trailing-comma +# args: [--py36-plus] + +# - repo: https://github.com/asottile/pyupgrade +# rev: v2.37.2 +# hooks: +# - id: pyupgrade +# args: [--py38-plus] + + +# #- repo: https://github.com/pre-commit/mirrors-mypy +# # rev: v0.961 +# # hooks: +# # - id: mypy + +# - repo: https://github.com/psf/black +# rev: 22.3.0 +# hooks: +# - id: black + +# - repo: https://github.com/PyCQA/isort +# rev: 5.10.1 +# hooks: +# - id: isort +# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort diff --git a/bin/functions/load_config.py b/bin/functions/load_config.py index e9d32c5cc..32cde7d4d 100755 --- a/bin/functions/load_config.py +++ b/bin/functions/load_config.py @@ -244,6 +244,8 @@ def load_config(conf_root, workload_config_file, workload_folder, patching_confi def check_config(): # check configures # Ensure mandatory configures are available for _, prop_name in HiBenchEnvPropMappingMandatory.items(): + print(HibenchConf.get(prop_name)) + print(HiBenchEnvPropMappingMandatory.items()) assert HibenchConf.get(prop_name, None) is not None, ( "Mandatory configure missing: %s" % prop_name ) From e97f6b044e2bdb598f1d5e9a653a67e321566feb Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Jul 2022 14:24:01 +0530 Subject: [PATCH 20/22] uncommented --- .pre-commit-config.yaml | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d7c85513..d1b6fe921 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,42 +16,42 @@ repos: - id: reorder-python-imports args: [--py38-plus] -# - repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.6.0 -# hooks: -# - id: autopep8 - -# - repo: https://github.com/PyCQA/flake8 -# rev: 4.0.1 -# hooks: -# - id: flake8 -# args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -# - repo: https://github.com/asottile/add-trailing-comma -# rev: v2.2.3 -# hooks: -# - id: add-trailing-comma -# args: [--py36-plus] - -# - repo: https://github.com/asottile/pyupgrade -# rev: v2.37.2 -# hooks: -# - id: pyupgrade -# args: [--py38-plus] - - -# #- repo: https://github.com/pre-commit/mirrors-mypy -# # rev: v0.961 -# # hooks: -# # - id: mypy - -# - repo: https://github.com/psf/black -# rev: 22.3.0 -# hooks: -# - id: black - -# - repo: https://github.com/PyCQA/isort -# rev: 5.10.1 -# hooks: -# - id: isort -# args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.6.0 + hooks: + - id: autopep8 + +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] + +- repo: https://github.com/asottile/add-trailing-comma + rev: v2.2.3 + hooks: + - id: add-trailing-comma + args: [--py36-plus] + +- repo: https://github.com/asottile/pyupgrade + rev: v2.37.2 + hooks: + - id: pyupgrade + args: [--py38-plus] + + +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v0.961 +# hooks: +# - id: mypy + +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort From 3dd66924f2fe0a819f84fb77a7d29cffc35cc97e Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Jul 2022 14:33:19 +0530 Subject: [PATCH 21/22] removed the import --- bin/functions/execute_with_log.py | 2 -- bin/functions/monitor.py | 25 +++++-------------------- bin/functions/terminalsize.py | 2 -- bin/functions/test_load_config.py | 5 +---- 4 files changed, 6 insertions(+), 28 deletions(-) diff --git a/bin/functions/execute_with_log.py b/bin/functions/execute_with_log.py index 6d80c1aae..d20428e35 100755 --- a/bin/functions/execute_with_log.py +++ b/bin/functions/execute_with_log.py @@ -13,8 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations - import fnmatch import os import re diff --git a/bin/functions/monitor.py b/bin/functions/monitor.py index 6e50901b5..299654706 100755 --- a/bin/functions/monitor.py +++ b/bin/functions/monitor.py @@ -13,8 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations - import os import re import signal @@ -860,8 +858,7 @@ def parse_bench_log(benchlog_fn): cur = events[i].split(",")[0] next = events[i + 1].split(",")[0] if abs(int(cur) / 1000 - int(next) / 1000) < 1: - events[i] = events[i] + "
" + \ - re.split(",", events[i + 1], 1)[1] + events[i] = events[i] + "
" + re.split(",", events[i + 1], 1)[1] del events[i + 1] continue i += 1 @@ -909,10 +906,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): data_by_all_hosts = [classed_by_host.get(h, {}) for h in all_hosts] # all cpu cores, total cluster - summed1 = [ - x["cpu/total"] - for x in data_by_all_hosts if "cpu/total" in x - ] + summed1 = [x["cpu/total"] for x in data_by_all_hosts if "cpu/total" in x] if summed1: summed = reduce_patched( @@ -959,10 +953,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): ) # all disk of each node, total cluster - summed1 = [ - x["disk/total"] - for x in data_by_all_hosts if "disk/total" in x - ] + summed1 = [x["disk/total"] for x in data_by_all_hosts if "disk/total" in x] if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) @@ -1005,10 +996,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): ) # memory of each node, total cluster - summed1 = [ - x["memory/total"] - for x in data_by_all_hosts if "memory/total" in x - ] + summed1 = [x["memory/total"] for x in data_by_all_hosts if "memory/total" in x] if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) for x in data_by_all_hosts: @@ -1085,10 +1073,7 @@ def generate_report(workload_title, log_fn, benchlog_fn, report_fn): ) # all network interface, total cluster - summed1 = [ - x["net/total"] - for x in data_by_all_hosts if "net/total" in x - ] + summed1 = [x["net/total"] for x in data_by_all_hosts if "net/total" in x] if summed1: summed = reduce_patched(lambda a, b: a._add(b), summed1) diff --git a/bin/functions/terminalsize.py b/bin/functions/terminalsize.py index 0871612d1..b3495b078 100644 --- a/bin/functions/terminalsize.py +++ b/bin/functions/terminalsize.py @@ -2,8 +2,6 @@ """ from https://gist.github.com/jtriley/1108174 """ -from __future__ import annotations - import os import platform import shlex diff --git a/bin/functions/test_load_config.py b/bin/functions/test_load_config.py index 59ac97e66..e529133ad 100644 --- a/bin/functions/test_load_config.py +++ b/bin/functions/test_load_config.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import fnmatch import glob import os @@ -377,8 +375,7 @@ def tearDown(self): def test_probe_java_opts(self): mapred_site_path = ( - load_config.HibenchConf["hibench.hadoop.configure.dir"] + - "/mapred-site.xml" + load_config.HibenchConf["hibench.hadoop.configure.dir"] + "/mapred-site.xml" ) mapred_site_content = "mapreduce.map.java.opts-Xmx1536M -DpreferIPv4Stack=truemapreduce.reduce.java.opts-Xmx1536M -DpreferIPv4Stack=true" From fda82db3065d8cff2ee3f40d1f8d908d3bda6ce1 Mon Sep 17 00:00:00 2001 From: Sreenivasulu Reddy Sura <100401235+sreenivasulureddysura@users.noreply.github.com> Date: Mon, 25 Jul 2022 14:43:10 +0530 Subject: [PATCH 22/22] Delete .pre-commit-config.yaml --- .pre-commit-config.yaml | 57 ----------------------------------------- 1 file changed, 57 deletions(-) delete mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index d1b6fe921..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,57 +0,0 @@ -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: debug-statements - # - id: double-quote-string-fixer - - id: name-tests-test - # - id: requirements-txt-fixer - -- repo: https://github.com/asottile/reorder_python_imports - rev: v3.8.2 - hooks: - - id: reorder-python-imports - args: [--py38-plus] - -- repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: ["--ignore=E501,W601,F401,E226,E722,E265,W504,E711,F841,E741,F821,E713,W503,F811"] - -- repo: https://github.com/asottile/add-trailing-comma - rev: v2.2.3 - hooks: - - id: add-trailing-comma - args: [--py36-plus] - -- repo: https://github.com/asottile/pyupgrade - rev: v2.37.2 - hooks: - - id: pyupgrade - args: [--py38-plus] - - -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v0.961 -# hooks: -# - id: mypy - -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - -- repo: https://github.com/PyCQA/isort - rev: 5.10.1 - hooks: - - id: isort - args: ["--py=38", "--profile", "black", "--filter-files"] # solves conflicts between black and isort