Skip to content

Commit c6d1b0b

Browse files
committed
Squashed commit of the following:
commit de8d27583852ee7efd80fd3c311e0f3b24225401 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Sun Oct 22 02:49:51 2023 +0200 Parallel builds with concatenated .cpp commit 21ca805 Author: github action <action@example.com> Date: Fri Jul 26 10:56:23 2024 +0000 Apply 'make format' commit 4598781 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 12:37:30 2024 +0200 Rename to m_concatenatedFilename Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit 70b76ec Author: Bartłomiej Chmiel <bachm44@gmail.com> Date: Fri Jul 26 12:33:25 2024 +0200 Use std::isspace Co-authored-by: Wilson Snyder <wsnyder@wsnyder.org> commit 1d654e1 Author: Bartłomiej Chmiel <bachm44@gmail.com> Date: Fri Jul 26 12:31:20 2024 +0200 Remove braces in the if statement Co-authored-by: Wilson Snyder <wsnyder@wsnyder.org> commit b00d034 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 11:55:59 2024 +0200 Use explicit types Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit 0d70bcc Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 11:24:11 2024 +0200 Rename FilenameWithScore Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit 60c5705 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 11:15:41 2024 +0200 Add 'm_' prefix for members, use '=' for primitive assignment Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit 150ab28 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 10:44:34 2024 +0200 Minimize disabled warnings in gen_group_scores Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit cacc8e5 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 10:38:20 2024 +0200 Rename script to gen_group_scores Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit aaebd3e Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 10:35:16 2024 +0200 Put --output-groups into sorted position Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit 9a58ee0 Author: Bartłomiej Chmiel <bchmiel@antmicro.com> Date: Fri Jul 26 10:34:15 2024 +0200 Rename to --output-groups Signed-off-by: Bartłomiej Chmiel <bchmiel@antmicro.com> commit b7738da Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 12 20:01:09 2024 +0200 Rename `--output-split-jobs` to `--output-concatenation-groups` commit 047f9da Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 12 15:32:44 2024 +0200 Use threshold based on score and number of buckets instead of standard deviation commit 0265126 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 14:06:28 2024 +0200 V3EmitMk: Emit concatenating files and Makefile entries commit 9c9486c Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 12 16:33:58 2024 +0200 Add test for `--output-split-jobs` commit c44b817 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 13:37:12 2024 +0200 V3EmitMk: `--output-split-jobs` file grouping implementation commit 37a36a4 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 12:54:14 2024 +0200 V3EmitC*: Increase output file score based on generated fragments of code commit b4a3a94 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 12:48:21 2024 +0200 V3EmitC*: Store AstCFile associated with current V3OutCFile commit b1f7afa Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Thu Jul 11 16:00:18 2024 +0200 Add tests for `--output-split-jobs` flag errors. commit b8528d6 Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 12:05:36 2024 +0200 Add `--output-split-jobs` option commit cb4693d Author: Mariusz Glebocki <mglebocki@antmicro.com> Date: Fri Jul 5 11:36:32 2024 +0200 Add data fields. No functional changes.
1 parent b1927e4 commit c6d1b0b

26 files changed

+1933
-92
lines changed

Makefile.in

+1
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ PY_PROGRAMS = \
447447
nodist/dot_importer \
448448
nodist/fuzzer/actual_fail \
449449
nodist/fuzzer/generate_dictionary \
450+
nodist/gen_group_scores \
450451
nodist/install_test \
451452
nodist/log_changes \
452453

bin/verilator

+1
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ detailed descriptions of these arguments.
405405
-O3 High-performance optimizations
406406
-O<optimization-letter> Selectable optimizations
407407
-o <executable> Name of final executable
408+
--output-groups <value> Group output files
408409
--output-split <statements> Split .cpp files into pieces
409410
--output-split-cfuncs <statements> Split model functions
410411
--output-split-ctrace <statements> Split tracing functions

bin/verilator_includer2

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env python3
2+
# pylint: disable=C0114,C0209
3+
#
4+
# Copyright 2003-2023 by Wilson Snyder. This program is free software; you
5+
# can redistribute it and/or modify the Verilator internals under the terms
6+
# of either the GNU Lesser General Public License Version 3 or the Perl
7+
# Artistic License Version 2.0.
8+
#
9+
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
10+
######################################################################
11+
12+
import sys
13+
from dataclasses import dataclass, field
14+
from pathlib import Path
15+
16+
@dataclass
17+
class BucketData:
18+
bucket_id: int
19+
size: int = 0
20+
filenames: list[str] = field(default_factory=list)
21+
22+
def __iter__(self):
23+
return iter((self.bucket_id, self.size, self.filenames))
24+
25+
26+
def get_list(fn: Path) -> tuple[int, list[tuple[int, Path]]]:
27+
total_size = 0
28+
result: list[tuple[int, Path]] = []
29+
files: list[str] = []
30+
with fn.open("r") as f:
31+
files = f.read().split()
32+
33+
for f in files:
34+
if not f: continue
35+
f = Path(f.strip())
36+
size = f.stat().st_size
37+
total_size += size
38+
result.append((size, f))
39+
40+
return (total_size, result)
41+
42+
43+
def main():
44+
input_list_file = Path(sys.argv[1])
45+
buckets_count = int(sys.argv[2])
46+
if buckets_count <= 0:
47+
raise ValueError(f"Arg 2: Expected buckets count, got: {sys.argv[2]}")
48+
output_name_template = sys.argv[3]
49+
if "%" not in output_name_template:
50+
raise ValueError(f"Arg 3: template must contain '%': {sys.argv[3]}")
51+
52+
total_size, input_list = get_list(input_list_file)
53+
54+
ideal_bucket_size = total_size // buckets_count
55+
56+
huge_files_num = 0
57+
huge_files_size = 0
58+
for size, _ in input_list:
59+
if size > ideal_bucket_size:
60+
huge_files_num += 1
61+
huge_files_size += size
62+
63+
ideal_bucket_size = max(1, total_size - huge_files_size) // max(1, buckets_count - huge_files_num)
64+
65+
buckets: list[BucketData] = [BucketData(i + 1) for i in range(buckets_count)]
66+
for bucket in buckets:
67+
while input_list:
68+
next_size, next_fn = input_list[0]
69+
diff_now = abs(ideal_bucket_size - bucket.size)
70+
diff_next = abs(ideal_bucket_size - bucket.size - next_size)
71+
if bucket.size == 0 or diff_now > diff_next:
72+
bucket.size += next_size
73+
bucket.filenames.append(str(next_fn))
74+
input_list.pop(0)
75+
else:
76+
break
77+
78+
while input_list:
79+
next_size, next_fn = input_list[0]
80+
buckets[-1].size += next_size
81+
buckets[-1].filenames.append(str(next_fn))
82+
input_list.pop(0)
83+
84+
for bucket_id, size, filenames in sorted(buckets, key = lambda b: b.size, reverse = True):
85+
# print(f"Bucket {bucket_id:>2} size: {size:>8}, distance from ideal: {ideal_bucket_size - size:>8}", file=sys.stderr)
86+
output_list_file = Path(output_name_template.replace("%", str(bucket_id)))
87+
with output_list_file.open("w") as f:
88+
f.write("\n".join([f"#include <{fn}>" for fn in filenames]) + "\n")
89+
90+
return 0
91+
92+
if __name__ == "__main__":
93+
sys.exit(main())

include/verilated.mk.in

+38-10
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ CFG_LDLIBS_THREADS = @CFG_LDLIBS_THREADS@
5151

5252
VERILATOR_COVERAGE = $(PERL) $(VERILATOR_ROOT)/bin/verilator_coverage
5353
VERILATOR_INCLUDER = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_includer
54+
VERILATOR_INCLUDER2 = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_includer2
5455
VERILATOR_CCACHE_REPORT = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_ccache_report
5556

5657
######################################################################
@@ -200,20 +201,46 @@ VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW))
200201
$(VK_GLOBAL_OBJS): $(VM_PREFIX).mk
201202

202203
ifneq ($(VM_PARALLEL_BUILDS),1)
203-
# Fast build for small designs: All .cpp files in one fell swoop. This
204-
# saves total compute, but can be slower if only a little changes. It is
205-
# also a lot slower for medium to large designs when the speed of the C
206-
# compiler dominates, which in this mode is not parallelizable.
204+
# Fast build for small designs: All .cpp files in one fell swoop. This
205+
# saves total compute, but can be slower if only a little changes. It is
206+
# also a lot slower for medium to large designs when the speed of the C
207+
# compiler dominates, which in this mode is not parallelizable.
207208

208-
VK_OBJS += $(VM_PREFIX)__ALL.o
209-
$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
209+
VK_OBJS += $(VM_PREFIX)__ALL.o
210+
211+
$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
210212
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
211-
all_cpp: $(VM_PREFIX)__ALL.cpp
213+
214+
.PHONY: all_cpp
215+
all_cpp: $(VM_PREFIX)__ALL.cpp
212216
else
213-
# Parallel build: Each .cpp file by itself. This can be somewhat slower for
214-
# very small designs and examples, but is a lot faster for large designs.
215217

216-
VK_OBJS += $(VK_OBJS_FAST) $(VK_OBJS_SLOW)
218+
# TODO(mglb): rename to something related to .cpp files concatenation
219+
# VM_PARALLEL_JOBS ?= 20
220+
221+
ifneq ($(filter-out 0 1,$(VM_PARALLEL_JOBS)),)
222+
223+
_VK_JOB_IDS := $(shell seq 1 ${VM_PARALLEL_JOBS})
224+
225+
.INTERMEDIATE: fast.list slow.list
226+
fast.list: $(VM_FAST:%=%.cpp)
227+
slow.list: $(VM_SLOW:%=%.cpp)
228+
229+
fast.list slow.list:
230+
$(file >$@,$(strip $^))
231+
232+
$(foreach id,$(_VK_JOB_IDS),%_$(id)_$(VM_PARALLEL_JOBS).cpp): %.list
233+
$(VERILATOR_INCLUDER2) $< ${VM_PARALLEL_JOBS} $(<:%.list=%)_%_${VM_PARALLEL_JOBS}.cpp
234+
235+
VK_OBJS_FAST = $(foreach job_id,${_VK_JOB_IDS},fast_${job_id}_${VM_PARALLEL_JOBS}.o)
236+
VK_OBJS_SLOW = $(foreach job_id,${_VK_JOB_IDS},slow_${job_id}_${VM_PARALLEL_JOBS}.o)
237+
238+
endif
239+
240+
# Parallel build: Each .cpp file by itself. This can be somewhat slower for
241+
# very small designs and examples, but is a lot faster for large designs.
242+
243+
VK_OBJS += $(VK_OBJS_FAST) $(VK_OBJS_SLOW)
217244
endif
218245

219246
# When archiving just objects (.o), use single $(AR) run
@@ -341,6 +368,7 @@ debug-make::
341368
@echo VM_GLOBAL_FAST: $(VM_GLOBAL_FAST)
342369
@echo VM_GLOBAL_SLOW: $(VM_GLOBAL_SLOW)
343370
@echo VM_PARALLEL_BUILDS: $(VM_PARALLEL_BUILDS)
371+
@echo VM_PARALLEL_JOBS: $(VM_PARALLEL_JOBS)
344372
@echo VM_PREFIX: $(VM_PREFIX)
345373
@echo VM_SUPPORT_FAST: $(VM_SUPPORT_FAST)
346374
@echo VM_SUPPORT_SLOW: $(VM_SUPPORT_SLOW)

nodist/gen_group_scores

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/usr/bin/env python3
2+
# pylint: disable=C0114,C0116
3+
#
4+
# Copyright 2022-2024 by Wilson Snyder. Verilator is free software; you
5+
# can redistribute it and/or modify it under the terms of either the GNU Lesser
6+
# General Public License Version 3 or the Apache License 2.0.
7+
# SPDX-License-Identifier: LGPL-3.0-only OR Apache-2.0
8+
#
9+
# Usage Example:
10+
#
11+
# ./nodist/gen_group_scores \
12+
# 200x15-100 2x999 16x50 2x999 100x50-150 500 4-33 8x0-999 0-7 \
13+
# > test_files_list.txt
14+
# ./bin/verilator --debugi-V3EmitMk 6 --output-groups 10 \
15+
# --debug-test-concatenation ./test_files_list.txt
16+
17+
import argparse
18+
import random
19+
import re
20+
import sys
21+
22+
23+
def parse_sequence_params(sequence_params: str) -> tuple[int, int, int] | None:
24+
m = re.match(r"^(?:([1-9][0-9]*)x)?(0|[1-9][0-9]*)(?:-(0|[1-9][0-9]*))?$",
25+
sequence_params)
26+
27+
repeat = 1
28+
min_score = 1
29+
max_score = 1
30+
31+
if not m:
32+
return None
33+
34+
if m.group(1) is not None:
35+
repeat = int(m.group(1))
36+
37+
min_score = int(m.group(2))
38+
max_score = int(m.group(2))
39+
40+
if m.group(3) is not None:
41+
max_score = int(m.group(3))
42+
43+
if max_score < min_score:
44+
min_score, max_score = max_score, min_score
45+
46+
return (repeat, min_score, max_score)
47+
48+
49+
def gen_sequence_scores(repeat: int, min_score: int,
50+
max_score: int) -> list[int]:
51+
result: list[int] = []
52+
for _ in range(repeat):
53+
score = min_score
54+
if min_score != max_score:
55+
score = random.randrange(min_score, max_score)
56+
result.append(score)
57+
return result
58+
59+
60+
def main():
61+
parser = argparse.ArgumentParser(
62+
allow_abbrev=False,
63+
formatter_class=argparse.RawTextHelpFormatter,
64+
description="Generates input list for .cpp file concatenation test",
65+
epilog="Copyright 2024 by Wilson Snyder. Verilator is free software;\n"
66+
"you can redistribute it and/or modify it under the terms of either the GNU\n"
67+
"Lesser General Public License Version 3 or the Apache License 2.0.\n"
68+
"SPDX-License-Identifier: LGPL-3.0-only OR Apache-2.0\n")
69+
70+
parser.add_argument(
71+
"sequence_params",
72+
type=str,
73+
nargs="+",
74+
help="Defines how to generate a sequence of entries. Syntax:\n"
75+
"\n"
76+
" [{repeat}x]{score}\n" + " [{repeat}x]{min_score}-{max_score}\n"
77+
"\n"
78+
"Where:\n" +
79+
" repeat number of entries in the list (1 by default)\n"
80+
" score use `score` for each entry\n" +
81+
" min_score/max_score use random score between min and max\n"
82+
" for each entry\n"
83+
"\n"
84+
"Examples:\n"
85+
" 120x5-80 120 entries, each with a random score between 5 and 80\n"
86+
" (inclusive) each.\n"
87+
" 1000-2000 1 entry with a random score between 1000 and 2000\n"
88+
" (inclusive).\n"
89+
" 10x24 10 entries, each with score 24.\n"
90+
" 9000 1 entry with score 9000.\n")
91+
92+
cmdline = parser.parse_args()
93+
94+
all_sequence_params: list[tuple[str, int, int, int]] = []
95+
for params_str in cmdline.sequence_params:
96+
sequence_params = parse_sequence_params(params_str)
97+
if sequence_params is None:
98+
# Exit with error message
99+
parser.error(f"Invalid sequence_params value: {params_str}")
100+
101+
all_sequence_params.append((params_str, *sequence_params))
102+
103+
abs_ent_idx = 0
104+
for seq_idx, (params_str, repeat, min_score,
105+
max_score) in enumerate(all_sequence_params):
106+
for seq_ent_idx, score in enumerate(
107+
gen_sequence_scores(repeat, min_score, max_score)):
108+
print(f"{score:>12}\t"
109+
f"dummy_cpp_file_{abs_ent_idx:04}"
110+
f".seq_{seq_idx:02}_{params_str}_{seq_ent_idx}"
111+
f".score_{score}")
112+
abs_ent_idx += 1
113+
114+
return 0
115+
116+
117+
if __name__ == "__main__":
118+
sys.exit(main())

src/V3AstNodeOther.h

+4
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,7 @@ class AstTask final : public AstNodeFTask {
22622262
class AstCFile final : public AstNodeFile {
22632263
// C++ output file
22642264
// Parents: NETLIST
2265+
int64_t m_complexityScore = 0;
22652266
bool m_slow : 1; ///< Compile w/o optimization
22662267
bool m_source : 1; ///< Source file (vs header file)
22672268
bool m_support : 1; ///< Support file (non systemc)
@@ -2274,6 +2275,9 @@ class AstCFile final : public AstNodeFile {
22742275
ASTGEN_MEMBERS_AstCFile;
22752276
void dump(std::ostream& str = std::cout) const override;
22762277
void dumpJson(std::ostream& str = std::cout) const override;
2278+
int64_t complexityScore() const { return m_complexityScore; }
2279+
void complexityScore(int64_t newScore) { m_complexityScore = newScore; }
2280+
void increaseComplexityScore(int64_t score) { m_complexityScore += score; }
22772281
bool slow() const { return m_slow; }
22782282
void slow(bool flag) { m_slow = flag; }
22792283
bool source() const { return m_source; }

src/V3EmitCBase.h

+31
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,40 @@ class EmitCBaseVisitorConst VL_NOT_FINAL : public VNVisitorConst, public EmitCBa
8585
public:
8686
// STATE
8787
V3OutCFile* m_ofp = nullptr;
88+
AstCFile* m_outFileNodep = nullptr;
8889
bool m_trackText = false; // Always track AstText nodes
8990
// METHODS
91+
void increaseComplexityScore(int64_t score) {
92+
assert(m_outFileNodep);
93+
m_outFileNodep->increaseComplexityScore(score);
94+
}
95+
96+
// Returns pointer to current output file object.
9097
V3OutCFile* ofp() const VL_MT_SAFE { return m_ofp; }
98+
// Returns pointer to the AST node that represents the output file (`ofp()`)
99+
AstCFile* outFileNodep() const VL_MT_SAFE { return m_outFileNodep; }
100+
101+
// Sets ofp() and outFileNodep() to the given pointers, without closing a file these pointers
102+
// currently point to.
103+
void setOutputFile(V3OutCFile* ofp, AstCFile* nodep) {
104+
m_ofp = ofp;
105+
m_outFileNodep = nodep;
106+
}
107+
108+
// Sets ofp() and outFileNodep() to null, without closing a file these pointers currently point
109+
// to. NOTE: Dummy nullptr argument is taken to make function calls more explicit.
110+
void setOutputFile(std::nullptr_t nullp) {
111+
UASSERT(nullp == nullptr, "Expected nullptr as the argument");
112+
m_ofp = nullp;
113+
m_outFileNodep = nullp;
114+
}
115+
116+
// Closes current output file. Sets ofp() and outFileNodep() to nullptr.
117+
void closeOutputFile() {
118+
VL_DO_CLEAR(delete m_ofp, m_ofp = nullptr);
119+
m_outFileNodep = nullptr;
120+
}
121+
91122
void puts(const string& str) { ofp()->puts(str); }
92123
void putns(const AstNode* nodep, const string& str) { ofp()->putns(nodep, str); }
93124
void putsHeader() { ofp()->putsHeader(); }

src/V3EmitCConstInit.h

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class EmitCConstInit VL_NOT_FINAL : public EmitCBaseVisitorConst {
4848
VL_RESTORER(m_inUnpacked);
4949
VL_RESTORER(m_unpackedWord);
5050
m_inUnpacked = true;
51+
increaseComplexityScore(1);
5152
if (VN_IS(nodep->dtypep()->skipRefp(), AssocArrayDType)) {
5253
// Note the double {{ initializer. The first { starts the initializer of the
5354
// VlUnpacked, and the second starts the initializer of m_storage within the
@@ -99,6 +100,7 @@ class EmitCConstInit VL_NOT_FINAL : public EmitCBaseVisitorConst {
99100
void visit(AstConst* nodep) override {
100101
const V3Number& num = nodep->num();
101102
UASSERT_OBJ(!num.isFourState(), nodep, "4-state value in constant pool");
103+
increaseComplexityScore(1);
102104
const AstNodeDType* const dtypep = nodep->dtypep();
103105
if (num.isNull()) {
104106
putns(nodep, "VlNull{}");

0 commit comments

Comments
 (0)