forked from tddschn/easygraph-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfill_excel.py
executable file
·182 lines (161 loc) · 6.47 KB
/
fill_excel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Author : Xinyuan Chen <45612704+tddschn@users.noreply.github.com>
Date : 2022-09-23
Purpose: Fill Min Gao's bench results template Excel file
"""
import argparse
from pathlib import Path
from config import (
tool_name_mapping,
dataset_homepage_mapping,
bench_results_table_name,
graph_info_table_name,
)
import openpyxl
from openpyxl.cell.cell import Cell, MergedCell
from openpyxl.worksheet.worksheet import Worksheet
from utils_db import get_graph_property_to_excel_field_mapping
import sqlite3
from copy import copy
bench_results_sqlite_db_path = Path(__file__).parent / 'bench-results.db'
template_workbook_path = 'output/easygraph-benchmark-results-template.xlsx'
new_workbook_path = 'output/easygraph-benchmark-results.xlsx'
dataset_name_col = 'A'
tool_col = 'A'
# cSpell:disable
avg_time_cols = list('BCDEF')
graph_property_cols = list('GHIJK')
# cSpell:enable
def copy_cell_style(cell: Cell) -> dict:
return {
'font': copy(cell.font),
'border': copy(cell.border),
'fill': copy(cell.fill),
'number_format': copy(cell.number_format),
'protection': copy(cell.protection),
'alignment': copy(cell.alignment),
}
def get_args():
"""Get command-line arguments"""
parser = argparse.ArgumentParser(
description='Fill Min Gao\'s bench results template Excel file',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'-t',
'--template-file',
help='Template Excel file',
metavar='PATH',
type=str,
default=template_workbook_path,
)
parser.add_argument(
'-o',
'--output-file',
help='Output Excel file',
metavar='PATH',
type=str,
default=new_workbook_path,
)
parser.add_argument(
'-G', '--fill-graph-info-only', help='Fill graph info only', action='store_true'
)
return parser.parse_args()
def get_dataset_name_to_row_number_mapping(worksheet: Worksheet) -> dict[str, int]:
rd = worksheet.row_dimensions
max_row = len(rd)
row_range = range(1, max_row + 1, 5)
dataset_name_to_row_number_mapping: dict[str, int] = {
worksheet[f'{dataset_name_col}{str(row_number)}'].value: row_number
for row_number in row_range
}
return dataset_name_to_row_number_mapping
def query_average_time(
cursor: sqlite3.Cursor, database_name: str, tool_abbr: str, method: str
) -> float:
query = f"""
SELECT "average_time" FROM "{bench_results_table_name}" WHERE "dataset" = :dataset AND "tool" = :tool_abbr and "method" = :method ORDER BY "iteration_count" DESC, "id" DESC LIMIT 1
"""
cursor.execute(
query, {'dataset': database_name, 'tool_abbr': tool_abbr, 'method': method}
)
try:
return float(cursor.fetchone()[0])
except:
print(
f'No result for {database_name}, {tool_abbr}, {method}, filled with -100.0 .'
)
# raise
return -100.0
def main() -> None:
args = get_args()
workbook = openpyxl.load_workbook(args.template_file)
print(f'Loaded template file: {args.template_file}')
worksheet = workbook.active
dataset_name_to_row_number_mapping = get_dataset_name_to_row_number_mapping(
worksheet=worksheet
)
with sqlite3.connect(bench_results_sqlite_db_path) as conn:
cursor = conn.cursor()
for dataset_name, row_number in dataset_name_to_row_number_mapping.items():
# add hyperlinks to the dataset name cells
if (
dataset_name in dataset_homepage_mapping
and not args.fill_graph_info_only
):
dataset_homepage = dataset_homepage_mapping[dataset_name]
cell = worksheet[f'{dataset_name_col}{str(row_number)}']
dataset_name_cell_styles = copy_cell_style(cell)
cell.hyperlink = dataset_homepage
cell.value = dataset_name
cell.style = 'Hyperlink'
for k, v in dataset_name_cell_styles.items():
setattr(cell, k, v)
# fill cells
for i, rn in enumerate(range(row_number + 1, row_number + 4)):
# fill avg times
for cn in avg_time_cols:
if args.fill_graph_info_only:
continue
cell = worksheet[f'{cn}{str(rn)}']
if isinstance(cell, MergedCell) or cell.value == 'N/A':
# print(f'skipped cell {cell}')
continue
tool = worksheet[f'{tool_col}{str(rn)}'].value
tool_abbr = tool_name_mapping[tool]
method = worksheet[f'{cn}{str(row_number)}'].value
cell.value = query_average_time(
cursor, dataset_name, tool_abbr, method
)
for k, v in copy_cell_style(worksheet['L2']).items():
setattr(cell, k, v)
# fill graph properties
if i == 0:
graph_property_to_excel_field_mapping = (
get_graph_property_to_excel_field_mapping()
)
excel_field_to_graph_property_mapping = {
v: k for k, v in graph_property_to_excel_field_mapping.items()
}
for cn in graph_property_cols:
cell = worksheet[f'{cn}{str(rn)}']
# if not isinstance(cell, MergedCell):
# print(f'skipped cell {cell}')
# continue
excel_graph_property = worksheet[f'{cn}{str(row_number)}'].value
graph_property = excel_field_to_graph_property_mapping[
excel_graph_property
]
query = f"""
SELECT "{graph_property}" FROM "{graph_info_table_name}" WHERE "dataset" = :dataset
"""
cursor.execute(query, {'dataset': dataset_name})
cell.value = cursor.fetchone()[0]
# print(f'filled cell {cell} with {cell.value}')
for k, v in copy_cell_style(worksheet['L2']).items():
setattr(cell, k, v)
workbook.save(args.output_file)
print(f'Saved new file: {args.output_file}')
if __name__ == '__main__':
main()