Skip to content

Commit 02e6130

Browse files
committed
chore: iavl/v2 alpha6
1 parent 0bce70d commit 02e6130

37 files changed

+3788
-2411
lines changed

v2/README.md

+62-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,62 @@
1-
# IAVL v2
1+
# iavl/v2
2+
3+
IAVL v2 is performance minded rewrite of IAVL v1. Benchmarks show a 10-20x improvement in
4+
throughput depending on the operation. The primary changes are:
5+
6+
- Checkpoints: periodic writes of dirty branch nodes to disk.
7+
- Leaf changelog: leaf nodes are flushed to disk at every version.
8+
- Replay: revert the tree to a previous version by replaying the leaf changelog.
9+
- Sharding: shards are created on pruning events.
10+
- BTree on disk: SQLite (a mature BTree implementation) is used for storage.
11+
- Cache: the AVL tree is cached in memory and (non-dirty) nodes evicted by configurable policy.
12+
13+
## Concepts
14+
15+
### Checkpoints
16+
17+
A checkpoint writes all dirty branch nodes currently in memory since the last checkpoint to
18+
disk. Checkpoints are distinct from shards. One shard may contain multiple checkpoints. A checkpoint occurs
19+
at a configurable interval or when the dirty branch nodes exceed a threshold.
20+
21+
### Leaf Changelog
22+
23+
The leaf changelog is a list of leaf nodes that have been written since the last checkpoint. Inserts and
24+
updates are in one table, deletes in another. They are ordered by a sequence number per version to allow for
25+
deterministic replay. The also makes it possible to evict leafs from the tree and rely on SQLite's
26+
page cache and memory map to manage efficient access for leaves.
27+
28+
### Replay
29+
30+
Replay is the process of reverting the tree to a previous version. Given a version v, the tree is loaded at
31+
the check version m less than or equal to v. The leaf changelog is replayed from m to v. The tree is now at
32+
version v.
33+
34+
This is useful for rolling back, or querying and proving the state of the tree at a previous version.
35+
36+
### Sharding
37+
38+
A shard contains all the changes to a tree from version m to version n. It may contain multiple checkpoints.
39+
40+
### BTree (SQLite)
41+
42+
Why SQLite? A B+Tree is a very efficient on disk data structure. The ideal implementation of IAVL on disk
43+
would be to lay out nodes in subtrees chunks in the same format as the in-memory AVL tree. A B+Tree is a
44+
as close an approximation to this as possible.
45+
46+
## Pruning
47+
48+
Parameters:
49+
50+
- invalidated ratio: the ratio of invalidated nodes to total nodes in a shard that triggers a
51+
pruning event. The default is 1.5. Roughly correleates to disk size of a complete tree, where (2 * ratio) is the size of the pre preuned, tree on disk. A ratio of 1.5 means that 3x the initial size should be provisioned.
52+
- minumum keep versions: the minimum number of versions to keep. This is a safety feature to
53+
prevent pruning to a version that is too recent. The default is 100.
54+
55+
Pruning events only occur on checkpoint boundaries. The prune version is the most recent check
56+
point less than or equal to the requested prune version.
57+
58+
On prune the latest shard is locked (readonly) and a new shard is created. The new shard is now
59+
the hot shard and subsequent SaveVersion calls write leafs and branches to it.
60+
61+
Deletes happen by writing a new shard without orphans, updating the shard connection, then
62+
dropping the old one.

v2/cmd/bench/bench.go

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package bench
2+
3+
import (
4+
"net/http"
5+
"testing"
6+
"time"
7+
8+
"github.com/cosmos/iavl/v2"
9+
"github.com/cosmos/iavl/v2/metrics"
10+
"github.com/cosmos/iavl/v2/testutil"
11+
"github.com/prometheus/client_golang/prometheus"
12+
"github.com/prometheus/client_golang/prometheus/promauto"
13+
"github.com/prometheus/client_golang/prometheus/promhttp"
14+
"github.com/spf13/cobra"
15+
"github.com/stretchr/testify/require"
16+
)
17+
18+
func Command() *cobra.Command {
19+
cmd := &cobra.Command{
20+
Use: "bench",
21+
Short: "run benchmarks",
22+
}
23+
cmd.AddCommand(benchCommand())
24+
return cmd
25+
}
26+
27+
func benchCommand() *cobra.Command {
28+
var (
29+
dbPath string
30+
changelogPath string
31+
loadSnapshot bool
32+
usePrometheus bool
33+
)
34+
cmd := &cobra.Command{
35+
Use: "std",
36+
Short: "run the std development benchmark",
37+
Long: `Runs a longer benchmark for the IAVL tree. This is useful for development and testing.
38+
Pre-requisites this command:
39+
$ go run ./cmd gen tree --db /tmp/iavl-v2 --limit 1 --type osmo-like-many
40+
mkdir -p /tmp/osmo-like-many/v2 && go run ./cmd gen emit --start 2 --limit 1000 --type osmo-like-many --out /tmp/osmo-like-many/v2
41+
42+
Optional for --snapshot arg:
43+
$ go run ./cmd snapshot --db /tmp/iavl-v2 --version 1
44+
`,
45+
46+
RunE: func(_ *cobra.Command, _ []string) error {
47+
t := &testing.T{}
48+
treeOpts := iavl.DefaultTreeOptions()
49+
treeOpts.CheckpointInterval = 80
50+
treeOpts.StateStorage = true
51+
treeOpts.HeightFilter = 1
52+
treeOpts.EvictionDepth = 22
53+
treeOpts.MetricsProxy = metrics.NewStructMetrics()
54+
if usePrometheus {
55+
treeOpts.MetricsProxy = newPrometheusMetricsProxy()
56+
}
57+
58+
var multiTree *iavl.MultiTree
59+
if loadSnapshot {
60+
pool := iavl.NewNodePool()
61+
var err error
62+
multiTree, err = iavl.ImportMultiTree(pool, 1, dbPath, treeOpts)
63+
require.NoError(t, err)
64+
} else {
65+
multiTree = iavl.NewMultiTree(dbPath, treeOpts)
66+
require.NoError(t, multiTree.MountTrees())
67+
require.NoError(t, multiTree.LoadVersion(1))
68+
require.NoError(t, multiTree.WarmLeaves())
69+
}
70+
71+
opts := testutil.CompactedChangelogs(changelogPath)
72+
opts.SampleRate = 250_000
73+
74+
// opts.Until = 1_000
75+
// opts.UntilHash = "557663181d9ab97882ecfc6538e3b4cfe31cd805222fae905c4b4f4403ca5cda"
76+
opts.Until = 500
77+
opts.UntilHash = "2670bd5767e70f2bf9e4f723b5f205759e39afdb5d8cfb6b54a4a3ecc27a1377"
78+
79+
multiTree.TestBuild(t, opts)
80+
return nil
81+
},
82+
}
83+
cmd.Flags().StringVar(&dbPath, "db", "/tmp/iavl-v2", "the path to the database at version 1")
84+
cmd.Flags().StringVar(&changelogPath, "changelog", "/tmp/osmo-like-many/v2", "the path to the changelog")
85+
cmd.Flags().BoolVar(&loadSnapshot, "snapshot", false, "load the snapshot at version 1 before running the benchmarks (loads full tree into memory)")
86+
cmd.Flags().BoolVar(&usePrometheus, "prometheus", false, "enable prometheus metrics")
87+
88+
if err := cmd.MarkFlagRequired("changelog"); err != nil {
89+
panic(err)
90+
}
91+
if err := cmd.MarkFlagRequired("db"); err != nil {
92+
panic(err)
93+
}
94+
return cmd
95+
}
96+
97+
var _ metrics.Proxy = &prometheusMetricsProxy{}
98+
99+
type prometheusMetricsProxy struct {
100+
workingSize prometheus.Gauge
101+
workingBytes prometheus.Gauge
102+
}
103+
104+
func newPrometheusMetricsProxy() *prometheusMetricsProxy {
105+
p := &prometheusMetricsProxy{}
106+
p.workingSize = promauto.NewGauge(prometheus.GaugeOpts{
107+
Name: "iavl_working_size",
108+
Help: "working size",
109+
})
110+
p.workingBytes = promauto.NewGauge(prometheus.GaugeOpts{
111+
Name: "iavl_working_bytes",
112+
Help: "working bytes",
113+
})
114+
http.Handle("/metrics", promhttp.Handler())
115+
go func() {
116+
err := http.ListenAndServe(":2112", nil)
117+
if err != nil {
118+
panic(err)
119+
}
120+
}()
121+
return p
122+
}
123+
124+
func (p *prometheusMetricsProxy) IncrCounter(_ float32, _ ...string) {
125+
}
126+
127+
func (p *prometheusMetricsProxy) SetGauge(val float32, keys ...string) {
128+
k := keys[1]
129+
switch k {
130+
case "working_size":
131+
p.workingSize.Set(float64(val))
132+
case "working_bytes":
133+
p.workingBytes.Set(float64(val))
134+
}
135+
}
136+
137+
func (p *prometheusMetricsProxy) MeasureSince(_ time.Time, _ ...string) {}

v2/cmd/gen/gen.go

+19-13
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,15 @@ import (
1212
"github.com/dustin/go-humanize"
1313
"github.com/kocubinski/costor-api/compact"
1414
"github.com/kocubinski/costor-api/core"
15+
"github.com/rs/zerolog"
16+
zlog "github.com/rs/zerolog/log"
1517
"github.com/spf13/cobra"
1618
)
1719

18-
var log = iavl.NewTestLogger()
20+
var log = zlog.Output(zerolog.ConsoleWriter{
21+
Out: os.Stderr,
22+
TimeFormat: time.Stamp,
23+
})
1924

2025
func Command() *cobra.Command {
2126
cmd := &cobra.Command{
@@ -51,7 +56,7 @@ func emitCommand() *cobra.Command {
5156
cmd := &cobra.Command{
5257
Use: "emit",
5358
Short: "emit generated changesets to disk",
54-
RunE: func(cmd *cobra.Command, args []string) error {
59+
RunE: func(cmd *cobra.Command, _ []string) error {
5560
itr, err := getChangesetIterator(typ)
5661
if err != nil {
5762
return err
@@ -70,10 +75,9 @@ func emitCommand() *cobra.Command {
7075
go func() {
7176
stats, err := stream.Compact()
7277
if err != nil {
73-
log.Error("failed to compact", "error", err)
74-
os.Exit(1)
78+
log.Fatal().Err(err).Msg("failed to compact")
7579
}
76-
log.Info(stats.Report())
80+
log.Info().Msg(stats.Report())
7781
wg.Done()
7882
}()
7983

@@ -91,13 +95,13 @@ func emitCommand() *cobra.Command {
9195

9296
if itr.Version() < int64(start) {
9397
if cnt%5_000_000 == 0 {
94-
log.Info(fmt.Sprintf("fast forward version=%d nodes=%s", itr.Version(), humanize.Comma(cnt)))
98+
log.Info().Msgf("fast forward version=%d nodes=%s", itr.Version(), humanize.Comma(cnt))
9599
}
96100
continue
97101
}
98102

99103
if cnt%500_000 == 0 {
100-
log.Info(fmt.Sprintf("version=%d nodes=%s", itr.Version(), humanize.Comma(cnt)))
104+
log.Info().Msgf("version=%d nodes=%s", itr.Version(), humanize.Comma(cnt))
101105
}
102106

103107
select {
@@ -144,12 +148,12 @@ func treeCommand() *cobra.Command {
144148
cmd := &cobra.Command{
145149
Use: "tree",
146150
Short: "build and save a Tree to disk, taking generated changesets as input",
147-
RunE: func(cmd *cobra.Command, args []string) error {
148-
multiTree := iavl.NewMultiTree(iavl.NewTestLogger(), dbPath, iavl.TreeOptions{StateStorage: true})
151+
RunE: func(_ *cobra.Command, _ []string) error {
152+
multiTree := iavl.NewMultiTree(dbPath, iavl.TreeOptions{StateStorage: true})
149153
defer func(mt *iavl.MultiTree) {
150154
err := mt.Close()
151155
if err != nil {
152-
log.Error("failed to close db", "error", err)
156+
log.Error().Err(err).Msg("failed to close db")
153157
}
154158
}(multiTree)
155159

@@ -199,12 +203,12 @@ func treeCommand() *cobra.Command {
199203

200204
i++
201205
if i%100_000 == 0 {
202-
log.Info(fmt.Sprintf("leaves=%s dur=%s rate=%s version=%d",
206+
log.Info().Msgf("leaves=%s dur=%s rate=%s version=%d",
203207
humanize.Comma(i),
204208
time.Since(start),
205209
humanize.Comma(int64(100_000/time.Since(start).Seconds())),
206210
itr.Version(),
207-
))
211+
)
208212
start = time.Now()
209213
}
210214
}
@@ -215,7 +219,7 @@ func treeCommand() *cobra.Command {
215219
}
216220
}
217221

218-
log.Info(fmt.Sprintf("last version=%d hash=%x", lastVersion, lastHash))
222+
log.Info().Msgf("last version=%d hash=%x", lastVersion, lastHash)
219223

220224
return nil
221225
},
@@ -228,3 +232,5 @@ func treeCommand() *cobra.Command {
228232
cmd.Flags().Int64Var(&limit, "limit", -1, "the version (inclusive) to halt generation at. -1 means no limit")
229233
return cmd
230234
}
235+
236+
// pre-requisites this command

v2/cmd/rollback/rollback.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
package rollback
22

33
import (
4-
"fmt"
4+
"os"
5+
"time"
56

67
"github.com/cosmos/iavl/v2"
8+
"github.com/rs/zerolog"
9+
zlog "github.com/rs/zerolog/log"
710
"github.com/spf13/cobra"
811
)
912

10-
var log = iavl.NewTestLogger()
13+
var log = zlog.Output(zerolog.ConsoleWriter{
14+
Out: os.Stderr,
15+
TimeFormat: time.Stamp,
16+
})
1117

1218
func Command() *cobra.Command {
1319
var (
@@ -17,13 +23,13 @@ func Command() *cobra.Command {
1723
cmd := &cobra.Command{
1824
Use: "rollback",
1925
Short: "Rollback IAVL to a previous version",
20-
RunE: func(cmd *cobra.Command, args []string) error {
26+
RunE: func(_ *cobra.Command, _ []string) error {
2127
dbPaths, err := iavl.FindDbsInPath(path)
2228
if err != nil {
2329
return err
2430
}
2531
for _, dbPath := range dbPaths {
26-
log.Info(fmt.Sprintf("revert db %s to version %d", dbPath, version))
32+
log.Info().Msgf("revert db %s to version %d", dbPath, version)
2733
sql, err := iavl.NewSqliteDb(iavl.NewNodePool(), iavl.SqliteDbOptions{Path: dbPath})
2834
if err != nil {
2935
return err

v2/cmd/root.go

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"github.com/cosmos/iavl/v2/cmd/bench"
45
"github.com/cosmos/iavl/v2/cmd/gen"
56
"github.com/cosmos/iavl/v2/cmd/rollback"
67
"github.com/cosmos/iavl/v2/cmd/scan"
@@ -18,6 +19,7 @@ func RootCommand() (*cobra.Command, error) {
1819
snapshot.Command(),
1920
rollback.Command(),
2021
scan.Command(),
22+
bench.Command(),
2123
latestCommand(),
2224
)
2325
return cmd, nil

v2/cmd/scan/scan.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func probeCommand() *cobra.Command {
2121
cmd := &cobra.Command{
2222
Use: "probe",
2323
Short: "prob sqlite cgo configuration",
24-
RunE: func(cmd *cobra.Command, args []string) error {
24+
RunE: func(_ *cobra.Command, _ []string) error {
2525
f, err := os.CreateTemp("", "iavl-v2-probe.sqlite")
2626
if err != nil {
2727
return err
@@ -82,7 +82,7 @@ func rootsCommand() *cobra.Command {
8282
cmd := &cobra.Command{
8383
Use: "roots",
8484
Short: "list roots",
85-
RunE: func(cmd *cobra.Command, args []string) error {
85+
RunE: func(_ *cobra.Command, _ []string) error {
8686
sql, err := iavl.NewSqliteDb(iavl.NewNodePool(), iavl.SqliteDbOptions{Path: dbPath})
8787
if err != nil {
8888
return err
@@ -97,7 +97,11 @@ func rootsCommand() *cobra.Command {
9797
}
9898
cmd.Flags().StringVar(&dbPath, "db", "", "path to sqlite db")
9999
cmd.Flags().Int64Var(&version, "version", 0, "version to query")
100-
cmd.MarkFlagRequired("db")
101-
cmd.MarkFlagRequired("version")
100+
if err := cmd.MarkFlagRequired("db"); err != nil {
101+
panic(err)
102+
}
103+
if err := cmd.MarkFlagRequired("version"); err != nil {
104+
panic(err)
105+
}
102106
return cmd
103107
}

0 commit comments

Comments
 (0)