From a2d5edc4d11dee5440e07e50737985254f1ee912 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 5 Dec 2019 20:01:21 +0100 Subject: [PATCH 01/23] storage/localstore: integrate fcds --- storage/fcds/fcds.go | 305 +++++++++++++++++ storage/fcds/leveldb/leveldb.go | 161 +++++++++ storage/fcds/leveldb/leveldb_test.go | 43 +++ storage/fcds/mem/mem.go | 112 +++++++ storage/fcds/mem/mem_test.go | 41 +++ storage/fcds/meta.go | 59 ++++ storage/fcds/mock/mock.go | 113 +++++++ storage/fcds/mock/mock_test.go | 41 +++ storage/fcds/offsetcache.go | 56 ++++ storage/fcds/test/store.go | 332 +++++++++++++++++++ storage/localstore/export.go | 13 +- storage/localstore/gc.go | 25 +- storage/localstore/localstore.go | 101 +++--- storage/localstore/localstore_test.go | 52 +-- storage/localstore/mode_get.go | 15 +- storage/localstore/mode_get_multi.go | 11 +- storage/localstore/mode_has.go | 15 +- storage/localstore/mode_put.go | 56 ++-- storage/localstore/mode_put_test.go | 54 +-- storage/localstore/mode_set.go | 68 ++-- storage/localstore/mode_set_test.go | 11 +- storage/localstore/subscription_pull_test.go | 2 +- storage/localstore/subscription_push.go | 4 +- 23 files changed, 1472 insertions(+), 218 deletions(-) create mode 100644 storage/fcds/fcds.go create mode 100644 storage/fcds/leveldb/leveldb.go create mode 100644 storage/fcds/leveldb/leveldb_test.go create mode 100644 storage/fcds/mem/mem.go create mode 100644 storage/fcds/mem/mem_test.go create mode 100644 storage/fcds/meta.go create mode 100644 storage/fcds/mock/mock.go create mode 100644 storage/fcds/mock/mock_test.go create mode 100644 storage/fcds/offsetcache.go create mode 100644 storage/fcds/test/store.go diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go new file mode 100644 index 0000000000..f960506498 --- /dev/null +++ b/storage/fcds/fcds.go @@ -0,0 +1,305 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package fcds + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "time" + + "github.com/ethersphere/swarm/chunk" +) + +const shardCount = 32 + +var ErrDBClosed = errors.New("closed database") + +type Interface interface { + Get(addr chunk.Address) (ch chunk.Chunk, err error) + Has(addr chunk.Address) (yes bool, err error) + Put(ch chunk.Chunk) (err error) + Delete(addr chunk.Address) (err error) + Count() (count int, err error) + Iterate(func(ch chunk.Chunk) (stop bool, err error)) (err error) + Close() (err error) +} + +var _ Interface = new(Store) + +type Store struct { + shards map[uint8]*os.File + shardsMu map[uint8]*sync.Mutex + meta MetaStore + free map[uint8]struct{} + freeMu sync.RWMutex + freeCache *offsetCache + wg sync.WaitGroup + maxChunkSize int + quit chan struct{} + quitOnce sync.Once +} + +func NewStore(path string, maxChunkSize int, metaStore MetaStore, noCache bool) (s *Store, err error) { + shards := make(map[byte]*os.File, shardCount) + shardsMu := make(map[uint8]*sync.Mutex) + for i := byte(0); i < shardCount; i++ { + shards[i], err = os.OpenFile(filepath.Join(path, fmt.Sprintf("chunks-%v.db", i)), os.O_CREATE|os.O_RDWR, 0666) + if err != nil { + return nil, err + } + shardsMu[i] = new(sync.Mutex) + } + var ( + freeCache *offsetCache + ) + if !noCache { + freeCache = newOffsetCache(shardCount) + } + return &Store{ + shards: shards, + shardsMu: shardsMu, + meta: metaStore, + freeCache: freeCache, + free: make(map[uint8]struct{}), + maxChunkSize: maxChunkSize, + quit: make(chan struct{}), + }, nil +} + +func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { + done, err := s.protect() + if err != nil { + return nil, err + } + defer done() + + mu := s.shardsMu[getShard(addr)] + mu.Lock() + defer mu.Unlock() + + m, err := s.getMeta(addr) + if err != nil { + return nil, err + } + data := make([]byte, m.Size) + n, err := s.shards[getShard(addr)].ReadAt(data, m.Offset) + if err != nil && err != io.EOF { + return nil, err + } + if n != int(m.Size) { + return nil, fmt.Errorf("incomplete chunk data, read %v of %v", n, m.Size) + } + return chunk.NewChunk(addr, data), nil +} + +func (s *Store) Has(addr chunk.Address) (yes bool, err error) { + done, err := s.protect() + if err != nil { + return false, err + } + defer done() + + mu := s.shardsMu[getShard(addr)] + mu.Lock() + defer mu.Unlock() + + _, err = s.getMeta(addr) + if err != nil { + if err == chunk.ErrChunkNotFound { + return false, nil + } + return false, err + } + return true, nil +} + +func (s *Store) Put(ch chunk.Chunk) (err error) { + done, err := s.protect() + if err != nil { + return err + } + defer done() + + addr := ch.Address() + shard := getShard(addr) + f := s.shards[shard] + data := ch.Data() + section := make([]byte, s.maxChunkSize) + copy(section, data) + + s.freeMu.RLock() + _, hasFree := s.free[shard] + s.freeMu.RUnlock() + + var offset int64 + var reclaimed bool + mu := s.shardsMu[shard] + mu.Lock() + if hasFree { + var freeOffset int64 = -1 + if s.freeCache != nil { + freeOffset = s.freeCache.get(shard) + } + if freeOffset < 0 { + freeOffset, err = s.meta.FreeOffset(shard) + if err != nil { + return err + } + } + if freeOffset < 0 { + offset, err = f.Seek(0, io.SeekEnd) + if err != nil { + mu.Unlock() + return err + } + s.freeMu.Lock() + delete(s.free, shard) + s.freeMu.Unlock() + } else { + offset, err = f.Seek(freeOffset, io.SeekStart) + if err != nil { + mu.Unlock() + return err + } + reclaimed = true + } + } else { + offset, err = f.Seek(0, io.SeekEnd) + if err != nil { + mu.Unlock() + return err + } + } + _, err = f.Write(section) + if err != nil { + mu.Unlock() + return err + } + if reclaimed { + if s.freeCache != nil { + s.freeCache.remove(shard, offset) + } + defer mu.Unlock() + } else { + mu.Unlock() + } + return s.meta.Set(addr, shard, reclaimed, &Meta{ + Size: uint16(len(data)), + Offset: offset, + }) +} + +func (s *Store) Delete(addr chunk.Address) (err error) { + done, err := s.protect() + if err != nil { + return err + } + defer done() + + shard := getShard(addr) + s.freeMu.Lock() + s.free[shard] = struct{}{} + s.freeMu.Unlock() + + mu := s.shardsMu[shard] + mu.Lock() + defer mu.Unlock() + + if s.freeCache != nil { + m, err := s.getMeta(addr) + if err != nil { + return err + } + s.freeCache.set(shard, m.Offset) + } + return s.meta.Remove(addr, shard) +} + +func (s *Store) Count() (count int, err error) { + return s.meta.Count() +} + +func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) { + done, err := s.protect() + if err != nil { + return err + } + defer done() + + for _, mu := range s.shardsMu { + mu.Lock() + } + defer func() { + for _, mu := range s.shardsMu { + mu.Unlock() + } + }() + + return s.meta.Iterate(func(addr chunk.Address, m *Meta) (stop bool, err error) { + data := make([]byte, m.Size) + _, err = s.shards[getShard(addr)].ReadAt(data, m.Offset) + if err != nil { + return true, err + } + return fn(chunk.NewChunk(addr, data)) + }) +} + +func (s *Store) Close() (err error) { + s.quitOnce.Do(func() { + close(s.quit) + }) + + done := make(chan struct{}) + go func() { + s.wg.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(15 * time.Second): + } + + for _, f := range s.shards { + if err := f.Close(); err != nil { + return err + } + } + return s.meta.Close() +} + +func (s *Store) protect() (done func(), err error) { + select { + case <-s.quit: + return nil, ErrDBClosed + default: + } + s.wg.Add(1) + return s.wg.Done, nil +} + +func (s *Store) getMeta(addr chunk.Address) (m *Meta, err error) { + return s.meta.Get(addr) +} + +func getShard(addr chunk.Address) (shard uint8) { + return addr[len(addr)-1] % shardCount +} diff --git a/storage/fcds/leveldb/leveldb.go b/storage/fcds/leveldb/leveldb.go new file mode 100644 index 0000000000..637eac5715 --- /dev/null +++ b/storage/fcds/leveldb/leveldb.go @@ -0,0 +1,161 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package leveldb + +import ( + "encoding/binary" + + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/storage/fcds" + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +var _ fcds.MetaStore = new(MetaStore) + +type MetaStore struct { + db *leveldb.DB +} + +func NewMetaStore(filename string) (s *MetaStore, err error) { + db, err := leveldb.OpenFile(filename, &opt.Options{}) + if err != nil { + return nil, err + } + return &MetaStore{ + db: db, + }, err +} + +func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { + data, err := s.db.Get(chunkKey(addr), nil) + if err != nil { + if err == leveldb.ErrNotFound { + return nil, chunk.ErrChunkNotFound + } + return nil, err + } + m = new(fcds.Meta) + if err := m.UnmarshalBinary(data); err != nil { + return nil, err + } + return m, nil +} + +func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds.Meta) (err error) { + batch := new(leveldb.Batch) + if reclaimed { + batch.Delete(freeKey(shard, m.Offset)) + } + meta, err := m.MarshalBinary() + if err != nil { + return err + } + batch.Put(chunkKey(addr), meta) + return s.db.Write(batch, nil) +} + +func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { + i := s.db.NewIterator(nil, nil) + defer i.Release() + + i.Seek([]byte{freePrefix, shard}) + key := i.Key() + if key == nil || key[0] != freePrefix || key[1] != shard { + return -1, nil + } + offset = int64(binary.BigEndian.Uint64(key[2:10])) + return offset, nil +} + +func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { + m, err := s.Get(addr) + if err != nil { + return err + } + batch := new(leveldb.Batch) + batch.Put(freeKey(shard, m.Offset), nil) + batch.Delete(chunkKey(addr)) + return s.db.Write(batch, nil) +} + +func (s *MetaStore) Count() (count int, err error) { + it := s.db.NewIterator(nil, nil) + defer it.Release() + + for ok := it.First(); ok; ok = it.Next() { + value := it.Value() + if len(value) == 0 { + continue + } + key := it.Key() + if len(key) < 1 { + continue + } + count++ + } + return count, it.Error() +} + +func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err error)) (err error) { + it := s.db.NewIterator(nil, nil) + defer it.Release() + + for ok := it.First(); ok; ok = it.Next() { + value := it.Value() + if len(value) == 0 { + continue + } + key := it.Key() + if len(key) < 1 { + continue + } + m := new(fcds.Meta) + if err := m.UnmarshalBinary(value); err != nil { + return err + } + stop, err := fn(chunk.Address(key[1:]), m) + if err != nil { + return err + } + if stop { + return nil + } + } + return it.Error() +} + +func (s *MetaStore) Close() (err error) { + return s.db.Close() +} + +const ( + chunkPrefix = 0 + freePrefix = 1 +) + +func chunkKey(addr chunk.Address) (key []byte) { + return append([]byte{chunkPrefix}, addr...) +} + +func freeKey(shard uint8, offset int64) (key []byte) { + key = make([]byte, 10) + key[0] = freePrefix + key[1] = shard + binary.BigEndian.PutUint64(key[2:10], uint64(offset)) + return key +} diff --git a/storage/fcds/leveldb/leveldb_test.go b/storage/fcds/leveldb/leveldb_test.go new file mode 100644 index 0000000000..81a1b5f952 --- /dev/null +++ b/storage/fcds/leveldb/leveldb_test.go @@ -0,0 +1,43 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package leveldb_test + +import ( + "io/ioutil" + "path/filepath" + "testing" + + "github.com/ethersphere/swarm/storage/fcds" + "github.com/ethersphere/swarm/storage/fcds/leveldb" + "github.com/ethersphere/swarm/storage/fcds/test" +) + +func TestFCDS(t *testing.T) { + test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + path, err := ioutil.TempDir("", "swarm-fcds-") + if err != nil { + t.Fatal(err) + } + + metaStore, err := leveldb.NewMetaStore(filepath.Join(path, "meta")) + if err != nil { + t.Fatal(err) + } + + return test.NewFCDSStore(t, path, metaStore) + }) +} diff --git a/storage/fcds/mem/mem.go b/storage/fcds/mem/mem.go new file mode 100644 index 0000000000..bce9bca857 --- /dev/null +++ b/storage/fcds/mem/mem.go @@ -0,0 +1,112 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package mem + +import ( + "sync" + + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/storage/fcds" +) + +var _ fcds.MetaStore = new(MetaStore) + +type MetaStore struct { + meta map[string]*fcds.Meta + free map[uint8]map[int64]struct{} + mu sync.RWMutex +} + +func NewMetaStore() (s *MetaStore) { + free := make(map[uint8]map[int64]struct{}) + for shard := uint8(0); shard < 255; shard++ { + free[shard] = make(map[int64]struct{}) + } + return &MetaStore{ + meta: make(map[string]*fcds.Meta), + free: free, + } +} + +func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { + s.mu.RLock() + m = s.meta[string(addr)] + s.mu.RUnlock() + if m == nil { + return nil, chunk.ErrChunkNotFound + } + return m, nil +} + +func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds.Meta) (err error) { + s.mu.Lock() + if reclaimed { + delete(s.free[shard], m.Offset) + } + s.meta[string(addr)] = m + s.mu.Unlock() + return nil +} + +func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { + s.mu.Lock() + defer s.mu.Unlock() + key := string(addr) + m := s.meta[key] + if m == nil { + return chunk.ErrChunkNotFound + } + s.free[shard][m.Offset] = struct{}{} + delete(s.meta, key) + return nil +} + +func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { + s.mu.RLock() + for o := range s.free[shard] { + s.mu.RUnlock() + return o, nil + } + s.mu.RUnlock() + return -1, nil +} + +func (s *MetaStore) Count() (count int, err error) { + s.mu.RLock() + count = len(s.meta) + s.mu.RUnlock() + return count, nil +} + +func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err error)) (err error) { + s.mu.RLock() + defer s.mu.RUnlock() + for a, m := range s.meta { + stop, err := fn(chunk.Address(a), m) + if err != nil { + return err + } + if stop { + return nil + } + } + return nil +} + +func (s *MetaStore) Close() (err error) { + return nil +} diff --git a/storage/fcds/mem/mem_test.go b/storage/fcds/mem/mem_test.go new file mode 100644 index 0000000000..929f8f7778 --- /dev/null +++ b/storage/fcds/mem/mem_test.go @@ -0,0 +1,41 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package mem_test + +import ( + "io/ioutil" + "testing" + + "github.com/ethersphere/swarm/storage/fcds" + "github.com/ethersphere/swarm/storage/fcds/mem" + "github.com/ethersphere/swarm/storage/fcds/test" +) + +func init() { + test.Init() +} + +func TestFCDS(t *testing.T) { + test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + path, err := ioutil.TempDir("", "swarm-fcds-") + if err != nil { + t.Fatal(err) + } + + return test.NewFCDSStore(t, path, mem.NewMetaStore()) + }) +} diff --git a/storage/fcds/meta.go b/storage/fcds/meta.go new file mode 100644 index 0000000000..6ae7f8b1cf --- /dev/null +++ b/storage/fcds/meta.go @@ -0,0 +1,59 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package fcds + +import ( + "encoding/binary" + "fmt" + + "github.com/ethersphere/swarm/chunk" +) + +type MetaStore interface { + Get(addr chunk.Address) (*Meta, error) + Set(addr chunk.Address, shard uint8, reclaimed bool, m *Meta) error + Remove(addr chunk.Address, shard uint8) error + Count() (int, error) + Iterate(func(chunk.Address, *Meta) (stop bool, err error)) error + FreeOffset(shard uint8) (int64, error) + Close() error +} + +type Meta struct { + Size uint16 + Offset int64 +} + +func (m *Meta) MarshalBinary() (data []byte, err error) { + data = make([]byte, 10) + binary.BigEndian.PutUint64(data[:8], uint64(m.Offset)) + binary.BigEndian.PutUint16(data[8:10], m.Size) + return data, nil +} + +func (m *Meta) UnmarshalBinary(data []byte) error { + m.Offset = int64(binary.BigEndian.Uint64(data[:8])) + m.Size = binary.BigEndian.Uint16(data[8:10]) + return nil +} + +func (m *Meta) String() (s string) { + if m == nil { + return "" + } + return fmt.Sprintf("{Size: %v, Offset %v}", m.Size, m.Offset) +} diff --git a/storage/fcds/mock/mock.go b/storage/fcds/mock/mock.go new file mode 100644 index 0000000000..41814bfc48 --- /dev/null +++ b/storage/fcds/mock/mock.go @@ -0,0 +1,113 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package mock + +import ( + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/storage/fcds" + "github.com/ethersphere/swarm/storage/mock" +) + +var _ fcds.Interface = new(Store) + +type Store struct { + m *mock.NodeStore +} + +func NewStore(m *mock.NodeStore) (s *Store) { + return &Store{ + m: m, + } +} + +func (s *Store) Get(addr chunk.Address) (c chunk.Chunk, err error) { + data, err := s.m.Get(addr) + if err != nil { + if err == mock.ErrNotFound { + return nil, chunk.ErrChunkNotFound + } + return nil, err + } + return chunk.NewChunk(addr, data), nil +} + +func (s *Store) Has(addr chunk.Address) (yes bool, err error) { + _, err = s.m.Get(addr) + if err != nil { + if err == mock.ErrNotFound { + return false, nil + } + return false, err + } + return true, nil +} + +func (s *Store) Put(ch chunk.Chunk) (err error) { + return s.m.Put(ch.Address(), ch.Data()) +} + +func (s *Store) Delete(addr chunk.Address) (err error) { + return s.m.Delete(addr) +} + +func (s *Store) Count() (count int, err error) { + var startKey []byte + for { + keys, err := s.m.Keys(startKey, 0) + if err != nil { + return 0, err + } + count += len(keys.Keys) + if keys.Next == nil { + break + } + startKey = keys.Next + } + return count, nil +} + +func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) { + var startKey []byte + for { + keys, err := s.m.Keys(startKey, 0) + if err != nil { + return err + } + for _, addr := range keys.Keys { + data, err := s.m.Get(addr) + if err != nil { + return err + } + stop, err := fn(chunk.NewChunk(addr, data)) + if err != nil { + return err + } + if stop { + return nil + } + } + if keys.Next == nil { + break + } + startKey = keys.Next + } + return nil +} + +func (s *Store) Close() error { + return nil +} diff --git a/storage/fcds/mock/mock_test.go b/storage/fcds/mock/mock_test.go new file mode 100644 index 0000000000..420b235f39 --- /dev/null +++ b/storage/fcds/mock/mock_test.go @@ -0,0 +1,41 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package mock_test + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethersphere/swarm/storage/fcds" + "github.com/ethersphere/swarm/storage/fcds/mock" + "github.com/ethersphere/swarm/storage/fcds/test" + "github.com/ethersphere/swarm/storage/mock/mem" +) + +func init() { + test.Init() +} + +func TestFCDS(t *testing.T) { + test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + return mock.NewStore( + mem.NewGlobalStore().NewNodeStore( + common.BytesToAddress(make([]byte, 20)), + ), + ), func() {} + }) +} diff --git a/storage/fcds/offsetcache.go b/storage/fcds/offsetcache.go new file mode 100644 index 0000000000..a28525db58 --- /dev/null +++ b/storage/fcds/offsetcache.go @@ -0,0 +1,56 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package fcds + +import "sync" + +type offsetCache struct { + m map[uint8]map[int64]struct{} + mu sync.RWMutex +} + +func newOffsetCache(shardCount uint8) (c *offsetCache) { + m := make(map[uint8]map[int64]struct{}) + for i := uint8(0); i < shardCount; i++ { + m[i] = make(map[int64]struct{}) + } + return &offsetCache{ + m: m, + } +} + +func (c *offsetCache) get(shard uint8) (offset int64) { + c.mu.RLock() + for o := range c.m[shard] { + c.mu.RUnlock() + return o + } + c.mu.RUnlock() + return -1 +} + +func (c *offsetCache) set(shard uint8, offset int64) { + c.mu.Lock() + c.m[shard][offset] = struct{}{} + c.mu.Unlock() +} + +func (c *offsetCache) remove(shard uint8, offset int64) { + c.mu.Lock() + delete(c.m[shard], offset) + c.mu.Unlock() +} diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go new file mode 100644 index 0000000000..8ee8f5188d --- /dev/null +++ b/storage/fcds/test/store.go @@ -0,0 +1,332 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +package test + +import ( + "bytes" + "flag" + "fmt" + "io/ioutil" + "math/rand" + "os" + "sync" + "testing" + "time" + + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/storage/fcds" +) + +var ( + chunksFlag = flag.Int("chunks", 100, "Number of chunks to use in tests.") + concurrencyFlag = flag.Int("concurrency", 8, "Maximal number of parallel operations.") + noCacheFlag = flag.Bool("no-cache", false, "Disable memory cache.") +) + +func Init() { + testing.Init() + flag.Parse() +} + +func Test(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { + + t.Run("empty", func(t *testing.T) { + TestStore(t, &TestStoreOptions{ + ChunkCount: *chunksFlag, + NewStoreFunc: newStoreFunc, + }) + }) + + t.Run("cleaned", func(t *testing.T) { + TestStore(t, &TestStoreOptions{ + ChunkCount: *chunksFlag, + NewStoreFunc: newStoreFunc, + Cleaned: true, + }) + }) + + for _, tc := range []struct { + name string + deleteSplit int + }{ + { + name: "delete-all", + deleteSplit: 1, + }, + { + name: "delete-half", + deleteSplit: 2, + }, + { + name: "delete-fifth", + deleteSplit: 5, + }, + { + name: "delete-tenth", + deleteSplit: 10, + }, + { + name: "delete-percent", + deleteSplit: 100, + }, + { + name: "delete-permill", + deleteSplit: 1000, + }, + } { + t.Run(tc.name, func(t *testing.T) { + TestStore(t, &TestStoreOptions{ + ChunkCount: *chunksFlag, + DeleteSplit: tc.deleteSplit, + NewStoreFunc: newStoreFunc, + }) + }) + } + + t.Run("iterator", func(t *testing.T) { + TestIterator(t, newStoreFunc) + }) +} + +type TestStoreOptions struct { + ChunkCount int + DeleteSplit int + Cleaned bool + NewStoreFunc func(t *testing.T) (fcds.Interface, func()) +} + +func TestStore(t *testing.T, o *TestStoreOptions) { + db, clean := o.NewStoreFunc(t) + defer clean() + + chunks := getChunks(o.ChunkCount) + + if o.Cleaned { + t.Run("clean", func(t *testing.T) { + sem := make(chan struct{}, *concurrencyFlag) + var wg sync.WaitGroup + + wg.Add(o.ChunkCount) + for _, ch := range chunks { + sem <- struct{}{} + + go func(ch chunk.Chunk) { + defer func() { + <-sem + wg.Done() + }() + + if err := db.Put(ch); err != nil { + panic(err) + } + }(ch) + } + wg.Wait() + + wg = sync.WaitGroup{} + + wg.Add(o.ChunkCount) + for _, ch := range chunks { + sem <- struct{}{} + + go func(ch chunk.Chunk) { + defer func() { + <-sem + wg.Done() + }() + + if err := db.Delete(ch.Address()); err != nil { + panic(err) + } + }(ch) + } + wg.Wait() + }) + } + + rand.Shuffle(o.ChunkCount, func(i, j int) { + chunks[i], chunks[j] = chunks[j], chunks[i] + }) + + var deletedChunks sync.Map + + t.Run("write", func(t *testing.T) { + sem := make(chan struct{}, *concurrencyFlag) + var wg sync.WaitGroup + var wantCount int + var wantCountMu sync.Mutex + wg.Add(o.ChunkCount) + for i, ch := range chunks { + sem <- struct{}{} + + go func(i int, ch chunk.Chunk) { + defer func() { + <-sem + wg.Done() + }() + + if err := db.Put(ch); err != nil { + panic(err) + } + if o.DeleteSplit > 0 && i%o.DeleteSplit == 0 { + if err := db.Delete(ch.Address()); err != nil { + panic(err) + } + deletedChunks.Store(string(ch.Address()), nil) + } else { + wantCountMu.Lock() + wantCount++ + wantCountMu.Unlock() + } + }(i, ch) + } + wg.Wait() + }) + + rand.Shuffle(o.ChunkCount, func(i, j int) { + chunks[i], chunks[j] = chunks[j], chunks[i] + }) + + t.Run("read", func(t *testing.T) { + sem := make(chan struct{}, *concurrencyFlag) + var wg sync.WaitGroup + + wg.Add(o.ChunkCount) + for i, ch := range chunks { + sem <- struct{}{} + + go func(i int, ch chunk.Chunk) { + defer func() { + <-sem + wg.Done() + }() + + got, err := db.Get(ch.Address()) + + if _, ok := deletedChunks.Load(string(ch.Address())); ok { + if err != chunk.ErrChunkNotFound { + panic(fmt.Errorf("got error %v, want %v", err, chunk.ErrChunkNotFound)) + } + } else { + if err != nil { + panic(fmt.Errorf("chunk %v %s: %v", i, ch.Address().Hex(), err)) + } + if !bytes.Equal(got.Address(), ch.Address()) { + panic(fmt.Errorf("got chunk %v address %x, want %x", i, got.Address(), ch.Address())) + } + if !bytes.Equal(got.Data(), ch.Data()) { + panic(fmt.Errorf("got chunk %v data %x, want %x", i, got.Data(), ch.Data())) + } + } + }(i, ch) + } + wg.Wait() + }) +} + +func TestIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { + chunkCount := 1000 + + db, clean := newStoreFunc(t) + defer clean() + + chunks := getChunks(chunkCount) + + for _, ch := range chunks { + if err := db.Put(ch); err != nil { + t.Fatal(err) + } + } + + gotCount, err := db.Count() + if err != nil { + t.Fatal(err) + } + if gotCount != chunkCount { + t.Fatalf("got %v count, want %v", gotCount, chunkCount) + } + + var iteratedCount int + if err := db.Iterate(func(ch chunk.Chunk) (stop bool, err error) { + for _, c := range chunks { + if bytes.Equal(c.Address(), ch.Address()) { + if !bytes.Equal(c.Data(), ch.Data()) { + t.Fatalf("invalid data in iterator for key %s", c.Address()) + } + iteratedCount++ + return false, nil + } + } + return false, nil + }); err != nil { + t.Fatal(err) + } + if iteratedCount != chunkCount { + t.Fatalf("iterated on %v chunks, want %v", iteratedCount, chunkCount) + } +} + +func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds.Store, clean func()) { + t.Helper() + + path, err := ioutil.TempDir("", "swarm-fcds") + if err != nil { + t.Fatal(err) + } + + s, err = fcds.NewStore(path, chunk.DefaultSize, metaStore, *noCacheFlag) + if err != nil { + os.RemoveAll(path) + t.Fatal(err) + } + return s, func() { + s.Close() + os.RemoveAll(path) + } +} + +var chunkCache []chunk.Chunk + +func getChunks(count int) []chunk.Chunk { + l := len(chunkCache) + if l == 0 { + chunkCache = make([]chunk.Chunk, count) + for i := 0; i < count; i++ { + chunkCache[i] = GenerateTestRandomChunk() + } + return chunkCache + } + if l < count { + for i := 0; i < count-l; i++ { + chunkCache = append(chunkCache, GenerateTestRandomChunk()) + } + return chunkCache + } + return chunkCache[:count] +} + +func init() { + rand.Seed(time.Now().UnixNano()) +} + +func GenerateTestRandomChunk() chunk.Chunk { + data := make([]byte, chunk.DefaultSize) + rand.Read(data) + key := make([]byte, 32) + rand.Read(key) + return chunk.NewChunk(key, data) +} diff --git a/storage/localstore/export.go b/storage/localstore/export.go index b8826d976e..ccb6287860 100644 --- a/storage/localstore/export.go +++ b/storage/localstore/export.go @@ -27,7 +27,6 @@ import ( "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" - "github.com/ethersphere/swarm/shed" ) const ( @@ -58,23 +57,25 @@ func (db *DB) Export(w io.Writer) (count int64, err error) { return 0, err } - err = db.retrievalDataIndex.Iterate(func(item shed.Item) (stop bool, err error) { + err = db.data.Iterate(func(ch chunk.Chunk) (stop bool, err error) { + + data := ch.Data() hdr := &tar.Header{ - Name: hex.EncodeToString(item.Address), + Name: hex.EncodeToString(ch.Address()), Mode: 0644, - Size: int64(len(item.Data)), + Size: int64(len(data)), } if err := tw.WriteHeader(hdr); err != nil { return false, err } - if _, err := tw.Write(item.Data); err != nil { + if _, err := tw.Write(data); err != nil { return false, err } count++ return false, nil - }, nil) + }) return count, err } diff --git a/storage/localstore/gc.go b/storage/localstore/gc.go index f8c9c5ef34..8c7379440f 100644 --- a/storage/localstore/gc.go +++ b/storage/localstore/gc.go @@ -21,6 +21,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" + "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/shed" "github.com/syndtr/goleveldb/leveldb" ) @@ -108,6 +109,7 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { } metrics.GetOrRegisterGauge(metricName+".gcsize", nil).Update(int64(gcSize)) + var addrs []chunk.Address done = true err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) { if gcSize-collectedCount <= target { @@ -118,8 +120,9 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { metrics.GetOrRegisterGauge(metricName+".accessts", nil).Update(item.AccessTimestamp) // delete from retrieve, pull, gc - db.retrievalDataIndex.DeleteInBatch(batch, item) - db.retrievalAccessIndex.DeleteInBatch(batch, item) + //db.retrievalDataIndex.DeleteInBatch(batch, item) + addrs = append(addrs, item.Address) + db.metaIndex.DeleteInBatch(batch, item) db.pullIndex.DeleteInBatch(batch, item) db.gcIndex.DeleteInBatch(batch, item) collectedCount++ @@ -143,6 +146,12 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { metrics.GetOrRegisterCounter(metricName+".writebatch.err", nil).Inc(1) return 0, false, err } + for _, a := range addrs { + if err := db.data.Delete(a); err != nil { + metrics.GetOrRegisterCounter(metricName+".deletechunk.err", nil).Inc(1) + return 0, false, err + } + } return collectedCount, done, nil } @@ -162,18 +171,12 @@ func (db *DB) removeChunksInExcludeIndexFromGC() (err error) { var gcSizeChange int64 err = db.gcExcludeIndex.Iterate(func(item shed.Item) (stop bool, err error) { // Get access timestamp - retrievalAccessIndexItem, err := db.retrievalAccessIndex.Get(item) - if err != nil { - return false, err - } - item.AccessTimestamp = retrievalAccessIndexItem.AccessTimestamp - - // Get the binId - retrievalDataIndexItem, err := db.retrievalDataIndex.Get(item) + metaIndexItem, err := db.metaIndex.Get(item) if err != nil { return false, err } - item.BinID = retrievalDataIndexItem.BinID + item.AccessTimestamp = metaIndexItem.AccessTimestamp + item.BinID = metaIndexItem.BinID // Check if this item is in gcIndex and remove it ok, err := db.gcIndex.Has(item) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 6efb7df840..c68f65fc0f 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -20,6 +20,7 @@ import ( "encoding/binary" "errors" "os" + "path/filepath" "runtime/pprof" "sync" "time" @@ -28,6 +29,9 @@ import ( "github.com/ethereum/go-ethereum/metrics" "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/shed" + "github.com/ethersphere/swarm/storage/fcds" + fcdsleveldb "github.com/ethersphere/swarm/storage/fcds/leveldb" + fcdsmock "github.com/ethersphere/swarm/storage/fcds/mock" "github.com/ethersphere/swarm/storage/mock" ) @@ -61,9 +65,10 @@ type DB struct { // schema name of loaded data schemaName shed.StringField - // retrieval indexes - retrievalDataIndex shed.Index - retrievalAccessIndex shed.Index + // chunk data storage + data fcds.Interface + // bin index and timestamps index + metaIndex shed.Index // push syncing index pushIndex shed.Index // push syncing subscriptions triggers @@ -216,61 +221,22 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { if err != nil { return nil, err } - // Functions for retrieval data index. - var ( - encodeValueFunc func(fields shed.Item) (value []byte, err error) - decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error) - ) - if o.MockStore != nil { - encodeValueFunc = func(fields shed.Item) (value []byte, err error) { - b := make([]byte, 16) - binary.BigEndian.PutUint64(b[:8], fields.BinID) - binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) - err = o.MockStore.Put(fields.Address, fields.Data) - if err != nil { - return nil, err - } - return b, nil - } - decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { - e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16])) - e.BinID = binary.BigEndian.Uint64(value[:8]) - e.Data, err = o.MockStore.Get(keyItem.Address) - return e, err - } - } else { - encodeValueFunc = func(fields shed.Item) (value []byte, err error) { - b := make([]byte, 16) - binary.BigEndian.PutUint64(b[:8], fields.BinID) - binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) - value = append(b, fields.Data...) - return value, nil - } - decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) { - e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16])) - e.BinID = binary.BigEndian.Uint64(value[:8]) - e.Data = value[16:] - return e, nil - } - } - // Index storing actual chunk address, data and bin id. - db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|BinID|Data", shed.IndexFuncs{ - EncodeKey: func(fields shed.Item) (key []byte, err error) { - return fields.Address, nil - }, - DecodeKey: func(key []byte) (e shed.Item, err error) { - e.Address = key - return e, nil - }, - EncodeValue: encodeValueFunc, - DecodeValue: decodeValueFunc, - }) + + metaStore, err := fcdsleveldb.NewMetaStore(filepath.Join(path, "meta")) if err != nil { return nil, err } - // Index storing access timestamp for a particular address. + if o.MockStore == nil { + db.data, err = fcds.NewStore(path, chunk.DefaultSize+8, metaStore, false) + if err != nil { + return nil, err + } + } else { + db.data = fcdsmock.NewStore(o.MockStore) + } + // Index storing bin id, store and access timestamp for a particular address. // It is needed in order to update gc index keys for iteration order. - db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{ + db.metaIndex, err = db.shed.NewIndex("Address->BinID|StoreTimestamp|AccessTimestamp", shed.IndexFuncs{ EncodeKey: func(fields shed.Item) (key []byte, err error) { return fields.Address, nil }, @@ -279,12 +245,16 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { return e, nil }, EncodeValue: func(fields shed.Item) (value []byte, err error) { - b := make([]byte, 8) - binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp)) + b := make([]byte, 24) + binary.BigEndian.PutUint64(b[:8], fields.BinID) + binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) + binary.BigEndian.PutUint64(b[16:24], uint64(fields.AccessTimestamp)) return b, nil }, DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { - e.AccessTimestamp = int64(binary.BigEndian.Uint64(value)) + e.BinID = binary.BigEndian.Uint64(value[:8]) + e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16])) + e.AccessTimestamp = int64(binary.BigEndian.Uint64(value[16:24])) return e, nil }, }) @@ -457,6 +427,9 @@ func (db *DB) Close() (err error) { // TODO: use a logger to write a goroutine profile pprof.Lookup("goroutine").WriteTo(os.Stdout, 2) } + if err := db.data.Close(); err != nil { + log.Error("close chunk data storage", "err", err) + } return db.shed.Close() } @@ -471,13 +444,12 @@ func (db *DB) po(addr chunk.Address) (bin uint8) { func (db *DB) DebugIndices() (indexInfo map[string]int, err error) { indexInfo = make(map[string]int) for k, v := range map[string]shed.Index{ - "retrievalDataIndex": db.retrievalDataIndex, - "retrievalAccessIndex": db.retrievalAccessIndex, - "pushIndex": db.pushIndex, - "pullIndex": db.pullIndex, - "gcIndex": db.gcIndex, - "gcExcludeIndex": db.gcExcludeIndex, - "pinIndex": db.pinIndex, + "metaIndex": db.metaIndex, + "pushIndex": db.pushIndex, + "pullIndex": db.pullIndex, + "gcIndex": db.gcIndex, + "gcExcludeIndex": db.gcExcludeIndex, + "pinIndex": db.pinIndex, } { indexSize, err := v.Count() if err != nil { @@ -491,6 +463,7 @@ func (db *DB) DebugIndices() (indexInfo map[string]int, err error) { } indexInfo["gcSize"] = int(val) + indexInfo["data"], err = db.data.Count() return indexInfo, err } diff --git a/storage/localstore/localstore_test.go b/storage/localstore/localstore_test.go index ace0a10dcb..6fae246df4 100644 --- a/storage/localstore/localstore_test.go +++ b/storage/localstore/localstore_test.go @@ -32,7 +32,6 @@ import ( "github.com/ethersphere/swarm/chunk" chunktesting "github.com/ethersphere/swarm/chunk/testing" "github.com/ethersphere/swarm/shed" - "github.com/syndtr/goleveldb/leveldb" ) func init() { @@ -243,17 +242,15 @@ func newRetrieveIndexesTest(db *DB, chunk chunk.Chunk, storeTimestamp, accessTim return func(t *testing.T) { t.Helper() - item, err := db.retrievalDataIndex.Get(addressToItem(chunk.Address())) + c, err := db.data.Get(chunk.Address()) if err != nil { t.Fatal(err) } - validateItem(t, item, chunk.Address(), chunk.Data(), storeTimestamp, 0) + validateItem(t, shed.Item{Address: c.Address(), Data: c.Data()}, chunk.Address(), chunk.Data(), 0, 0) - // access index should not be set - wantErr := leveldb.ErrNotFound - item, err = db.retrievalAccessIndex.Get(addressToItem(chunk.Address())) - if err != wantErr { - t.Errorf("got error %v, want %v", err, wantErr) + _, err = db.metaIndex.Get(addressToItem(chunk.Address())) + if err != nil { + t.Errorf("got error %v, want %v", err, nil) } } } @@ -264,18 +261,18 @@ func newRetrieveIndexesTestWithAccess(db *DB, ch chunk.Chunk, storeTimestamp, ac return func(t *testing.T) { t.Helper() - item, err := db.retrievalDataIndex.Get(addressToItem(ch.Address())) + c, err := db.data.Get(ch.Address()) if err != nil { t.Fatal(err) } - validateItem(t, item, ch.Address(), ch.Data(), storeTimestamp, 0) + validateItem(t, shed.Item{Address: c.Address(), Data: c.Data()}, ch.Address(), ch.Data(), 0, 0) if accessTimestamp > 0 { - item, err = db.retrievalAccessIndex.Get(addressToItem(ch.Address())) + item, err := db.metaIndex.Get(addressToItem(ch.Address())) if err != nil { t.Fatal(err) } - validateItem(t, item, ch.Address(), nil, 0, accessTimestamp) + validateItem(t, item, ch.Address(), nil, storeTimestamp, accessTimestamp) } } } @@ -376,6 +373,20 @@ func newItemsCountTest(i shed.Index, want int) func(t *testing.T) { } } +func newDataCountTest(db *DB, want int) func(t *testing.T) { + return func(t *testing.T) { + t.Helper() + + got, err := db.data.Count() + if err != nil { + t.Fatal(err) + } + if got != want { + t.Fatalf("got %v chunks in data, want %v", got, want) + } + } +} + // newIndexGCSizeTest retruns a test function that validates if DB.gcSize // value is the same as the number of items in DB.gcIndex. func newIndexGCSizeTest(db *DB) func(t *testing.T) { @@ -531,7 +542,7 @@ func TestSetNow(t *testing.T) { } } -func testIndexCounts(t *testing.T, pushIndex, pullIndex, gcIndex, gcExcludeIndex, pinIndex, retrievalDataIndex, retrievalAccessIndex int, indexInfo map[string]int) { +func testIndexCounts(t *testing.T, pushIndex, pullIndex, gcIndex, gcExcludeIndex, pinIndex, data, metaIndex int, indexInfo map[string]int) { t.Helper() if indexInfo["pushIndex"] != pushIndex { t.Fatalf("pushIndex count mismatch. got %d want %d", indexInfo["pushIndex"], pushIndex) @@ -553,12 +564,12 @@ func testIndexCounts(t *testing.T, pushIndex, pullIndex, gcIndex, gcExcludeIndex t.Fatalf("pinIndex count mismatch. got %d want %d", indexInfo["pinIndex"], pinIndex) } - if indexInfo["retrievalDataIndex"] != retrievalDataIndex { - t.Fatalf("retrievalDataIndex count mismatch. got %d want %d", indexInfo["retrievalDataIndex"], retrievalDataIndex) + if indexInfo["data"] != data { + t.Fatalf("data count mismatch. got %d want %d", indexInfo["data"], data) } - if indexInfo["retrievalAccessIndex"] != retrievalAccessIndex { - t.Fatalf("retrievalAccessIndex count mismatch. got %d want %d", indexInfo["retrievalAccessIndex"], retrievalAccessIndex) + if indexInfo["metaIndex"] != metaIndex { + t.Fatalf("metaIndex count mismatch. got %d want %d", indexInfo["metaIndex"], metaIndex) } } @@ -585,8 +596,8 @@ func TestDBDebugIndexes(t *testing.T) { t.Fatal(err) } - // for reference: testIndexCounts(t *testing.T, pushIndex, pullIndex, gcIndex, gcExcludeIndex, pinIndex, retrievalDataIndex, retrievalAccessIndex int, indexInfo map[string]int) - testIndexCounts(t, 1, 1, 0, 0, 0, 1, 0, indexCounts) + // for reference: testIndexCounts(t *testing.T, pushIndex, pullIndex, gcIndex, gcExcludeIndex, pinIndex, data, metaIndex int, indexInfo map[string]int) + testIndexCounts(t, 1, 1, 0, 0, 0, 1, 1, indexCounts) // set the chunk for pinning and expect the index count to grow err = db.Set(context.Background(), chunk.ModeSetPin, ch.Address()) @@ -600,7 +611,7 @@ func TestDBDebugIndexes(t *testing.T) { } // assert that there's a pin and gc exclude entry now - testIndexCounts(t, 1, 1, 0, 1, 1, 1, 0, indexCounts) + testIndexCounts(t, 1, 1, 0, 1, 1, 1, 1, indexCounts) // set the chunk as accessed and expect the access index to grow err = db.Set(context.Background(), chunk.ModeSetAccess, ch.Address()) @@ -614,5 +625,4 @@ func TestDBDebugIndexes(t *testing.T) { // assert that there's a pin and gc exclude entry now testIndexCounts(t, 1, 1, 1, 1, 1, 1, 1, indexCounts) - } diff --git a/storage/localstore/mode_get.go b/storage/localstore/mode_get.go index bccebd4dd8..354ebfc4f0 100644 --- a/storage/localstore/mode_get.go +++ b/storage/localstore/mode_get.go @@ -58,19 +58,19 @@ func (db *DB) Get(ctx context.Context, mode chunk.ModeGet, addr chunk.Address) ( // get returns Item from the retrieval index // and updates other indexes. func (db *DB) get(mode chunk.ModeGet, addr chunk.Address) (out shed.Item, err error) { - item := addressToItem(addr) - - out, err = db.retrievalDataIndex.Get(item) + c, err := db.data.Get(addr) if err != nil { return out, err } + out.Address = addr + out.Data = c.Data() switch mode { // update the access timestamp and gc index case chunk.ModeGetRequest: db.updateGCItems(out) case chunk.ModeGetPin: - pinnedItem, err := db.pinIndex.Get(item) + pinnedItem, err := db.pinIndex.Get(addressToItem(addr)) if err != nil { return out, err } @@ -133,9 +133,11 @@ func (db *DB) updateGC(item shed.Item) (err error) { // update accessTimeStamp in retrieve, gc - i, err := db.retrievalAccessIndex.Get(item) + i, err := db.metaIndex.Get(item) switch err { case nil: + item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp item.AccessTimestamp = i.AccessTimestamp case leveldb.ErrNotFound: // no chunk accesses @@ -152,10 +154,9 @@ func (db *DB) updateGC(item shed.Item) (err error) { // update access timestamp item.AccessTimestamp = now() // update retrieve access index - db.retrievalAccessIndex.PutInBatch(batch, item) + db.metaIndex.PutInBatch(batch, item) // add new entry to gc index db.gcIndex.PutInBatch(batch, item) - return db.shed.WriteBatch(batch) } diff --git a/storage/localstore/mode_get_multi.go b/storage/localstore/mode_get_multi.go index 393f8d119e..f95644c376 100644 --- a/storage/localstore/mode_get_multi.go +++ b/storage/localstore/mode_get_multi.go @@ -61,11 +61,16 @@ func (db *DB) GetMulti(ctx context.Context, mode chunk.ModeGet, addrs ...chunk.A // and updates other indexes. func (db *DB) getMulti(mode chunk.ModeGet, addrs ...chunk.Address) (out []shed.Item, err error) { out = make([]shed.Item, len(addrs)) - for i, addr := range addrs { - out[i].Address = addr + for i, a := range addrs { + c, err := db.data.Get(a) + if err != nil { + return nil, err + } + out[i].Address = a + out[i].Data = c.Data() } - err = db.retrievalDataIndex.Fill(out) + err = db.metaIndex.Fill(out) if err != nil { return nil, err } diff --git a/storage/localstore/mode_has.go b/storage/localstore/mode_has.go index 0d06b28076..bcbe374e7d 100644 --- a/storage/localstore/mode_has.go +++ b/storage/localstore/mode_has.go @@ -31,7 +31,7 @@ func (db *DB) Has(ctx context.Context, addr chunk.Address) (bool, error) { metrics.GetOrRegisterCounter(metricName, nil).Inc(1) defer totalTimeMetric(metricName, time.Now()) - has, err := db.retrievalDataIndex.Has(addressToItem(addr)) + has, err := db.data.Has(addr) if err != nil { metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1) } @@ -46,9 +46,14 @@ func (db *DB) HasMulti(ctx context.Context, addrs ...chunk.Address) ([]bool, err metrics.GetOrRegisterCounter(metricName, nil).Inc(1) defer totalTimeMetric(metricName, time.Now()) - have, err := db.retrievalDataIndex.HasMulti(addressesToItems(addrs...)...) - if err != nil { - metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1) + have := make([]bool, len(addrs)) + for i, a := range addrs { + has, err := db.data.Has(a) + if err != nil { + metrics.GetOrRegisterCounter(metricName+".error", nil).Inc(1) + return nil, err + } + have[i] = has } - return have, err + return have, nil } diff --git a/storage/localstore/mode_put.go b/storage/localstore/mode_put.go index 33f4c4e525..815ac4f6bd 100644 --- a/storage/localstore/mode_put.go +++ b/storage/localstore/mode_put.go @@ -141,6 +141,12 @@ func (db *DB) put(mode chunk.ModePut, chs ...chunk.Chunk) (exist []bool, err err return nil, err } + for _, ch := range chs { + if err := db.data.Put(ch); err != nil { + return nil, err + } + } + err = db.shed.WriteBatch(batch) if err != nil { return nil, err @@ -161,12 +167,17 @@ func (db *DB) put(mode chunk.ModePut, chs ...chunk.Chunk) (exist []bool, err err // The batch can be written to the database. // Provided batch and binID map are updated. func (db *DB) putRequest(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed.Item) (exists bool, gcSizeChange int64, err error) { - i, err := db.retrievalDataIndex.Get(item) + i, err := db.metaIndex.Get(item) switch err { case nil: exists = true - item.StoreTimestamp = i.StoreTimestamp item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp + item.AccessTimestamp = i.AccessTimestamp + if item.AccessTimestamp > 0 { + db.gcIndex.DeleteInBatch(batch, item) + gcSizeChange-- + } case leveldb.ErrNotFound: // no chunk accesses exists = false @@ -183,12 +194,12 @@ func (db *DB) putRequest(batch *leveldb.Batch, binIDs map[uint8]uint64, item she } } - gcSizeChange, err = db.setGC(batch, item) - if err != nil { - return false, 0, err - } + item.AccessTimestamp = now() - db.retrievalDataIndex.PutInBatch(batch, item) + db.gcIndex.PutInBatch(batch, item) + gcSizeChange++ + + db.metaIndex.PutInBatch(batch, item) return exists, gcSizeChange, nil } @@ -198,7 +209,7 @@ func (db *DB) putRequest(batch *leveldb.Batch, binIDs map[uint8]uint64, item she // The batch can be written to the database. // Provided batch and binID map are updated. func (db *DB) putUpload(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed.Item) (exists bool, gcSizeChange int64, err error) { - exists, err = db.retrievalDataIndex.Has(item) + exists, err = db.data.Has(item.Address) if err != nil { return false, 0, err } @@ -226,7 +237,7 @@ func (db *DB) putUpload(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed if err != nil { return false, 0, err } - db.retrievalDataIndex.PutInBatch(batch, item) + db.metaIndex.PutInBatch(batch, item) db.pullIndex.PutInBatch(batch, item) if !anonymous { db.pushIndex.PutInBatch(batch, item) @@ -252,7 +263,7 @@ func (db *DB) putUpload(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed // The batch can be written to the database. // Provided batch and binID map are updated. func (db *DB) putSync(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed.Item) (exists bool, gcSizeChange int64, err error) { - exists, err = db.retrievalDataIndex.Has(item) + exists, err = db.data.Has(item.Address) if err != nil { return false, 0, err } @@ -272,7 +283,7 @@ func (db *DB) putSync(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed.I if err != nil { return false, 0, err } - db.retrievalDataIndex.PutInBatch(batch, item) + db.metaIndex.PutInBatch(batch, item) db.pullIndex.PutInBatch(batch, item) if db.putToGCCheck(item.Address) { @@ -291,31 +302,28 @@ func (db *DB) putSync(batch *leveldb.Batch, binIDs map[uint8]uint64, item shed.I // setGC is a helper function used to add chunks to the retrieval access // index and the gc index in the cases that the putToGCCheck condition -// warrants a gc set. this is to mitigate index leakage in edge cases where +// warrants a gc set. This is to mitigate index leakage in edge cases where // a chunk is added to a node's localstore and given that the chunk is // already within that node's NN (thus, it can be added to the gc index -// safely) +// safely). func (db *DB) setGC(batch *leveldb.Batch, item shed.Item) (gcSizeChange int64, err error) { - if item.BinID == 0 { - i, err := db.retrievalDataIndex.Get(item) - if err != nil { - return 0, err - } - item.BinID = i.BinID - } - i, err := db.retrievalAccessIndex.Get(item) + i, err := db.metaIndex.Get(item) switch err { case nil: + item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp item.AccessTimestamp = i.AccessTimestamp - db.gcIndex.DeleteInBatch(batch, item) - gcSizeChange-- + if item.AccessTimestamp > 0 { + db.gcIndex.DeleteInBatch(batch, item) + gcSizeChange-- + } case leveldb.ErrNotFound: // the chunk is not accessed before default: return 0, err } item.AccessTimestamp = now() - db.retrievalAccessIndex.PutInBatch(batch, item) + db.metaIndex.PutInBatch(batch, item) db.gcIndex.PutInBatch(batch, item) gcSizeChange++ diff --git a/storage/localstore/mode_put_test.go b/storage/localstore/mode_put_test.go index ce2277ae4a..2d1da34bd6 100644 --- a/storage/localstore/mode_put_test.go +++ b/storage/localstore/mode_put_test.go @@ -54,7 +54,7 @@ func TestModePutRequest(t *testing.T) { } for _, ch := range chunks { - newRetrieveIndexesTestWithAccess(db, ch, wantTimestamp, wantTimestamp)(t) + newRetrieveIndexesTestWithAccess(db, ch, wantTimestamp, 0)(t) } newItemsCountTest(db.gcIndex, tc.count)(t) @@ -73,7 +73,7 @@ func TestModePutRequest(t *testing.T) { } for _, ch := range chunks { - newRetrieveIndexesTestWithAccess(db, ch, storeTimestamp, wantTimestamp)(t) + newRetrieveIndexesTestWithAccess(db, ch, storeTimestamp, 0)(t) } newItemsCountTest(db.gcIndex, tc.count)(t) @@ -309,7 +309,7 @@ func TestModePut_sameChunk(t *testing.T) { return 0 } - newItemsCountTest(db.retrievalDataIndex, tc.count)(t) + newDataCountTest(db, tc.count)(t) newItemsCountTest(db.pullIndex, count(tcn.pullIndex))(t) newItemsCountTest(db.pushIndex, count(tcn.pushIndex))(t) } @@ -319,13 +319,9 @@ func TestModePut_sameChunk(t *testing.T) { } } -// TestModePutSync_addToGc validates ModePut* with PutSetCheckFunc stub results -// in the added chunk to show up in GC index -func TestModePut_addToGc(t *testing.T) { - retVal := true - // PutSetCheckFunc's output is toggled from the test case - opts := &Options{PutToGCCheck: func(_ []byte) bool { return retVal }} - +// TestModePutSync_addToGC validates ModePut* with PutToGCCheck stub results +// in the added chunk to show up in GC index. +func TestModePut_addToGC(t *testing.T) { for _, m := range []struct { mode chunk.ModePut putToGc bool @@ -337,10 +333,10 @@ func TestModePut_addToGc(t *testing.T) { {mode: chunk.ModePutRequest, putToGc: true}, // in ModePutRequest we always insert to GC, so putToGc=false not needed } { for _, tc := range multiChunkTestCases { - t.Run(tc.name, func(t *testing.T) { - retVal = m.putToGc - - db, cleanupFunc := newTestDB(t, opts) + t.Run(fmt.Sprintf("%s %s putToGc=%v", tc.name, m.mode, m.putToGc), func(t *testing.T) { + db, cleanupFunc := newTestDB(t, &Options{ + PutToGCCheck: func(_ []byte) bool { return m.putToGc }, + }) defer cleanupFunc() wantTimestamp := time.Now().UTC().UnixNano() @@ -367,18 +363,19 @@ func TestModePut_addToGc(t *testing.T) { newRetrieveIndexesTestWithAccess(db, ch, wantTimestamp, wantTimestamp) newGCIndexTest(db, ch, wantTimestamp, wantTimestamp, binIDs[po], wantErr)(t) } + + if m.putToGc { + newItemsCountTest(db.gcIndex, tc.count)(t) + newIndexGCSizeTest(db)(t) + } }) } } } -// TestModePutSync_addToGcExisting validates ModePut* with PutSetCheckFunc stub results -// in the added chunk to show up in GC index -func TestModePut_addToGcExisting(t *testing.T) { - retVal := true - // PutSetCheckFunc's output is toggled from the test case - opts := &Options{PutToGCCheck: func(_ []byte) bool { return retVal }} - +// TestModePutSync_addToGCExisting validates ModePut* with PutToGCCheck stub results +// in the added chunk to show up in GC index. +func TestModePut_addToGCExisting(t *testing.T) { for _, m := range []struct { mode chunk.ModePut putToGc bool @@ -390,10 +387,10 @@ func TestModePut_addToGcExisting(t *testing.T) { {mode: chunk.ModePutRequest, putToGc: true}, // in ModePutRequest we always insert to GC, so putToGc=false not needed } { for _, tc := range multiChunkTestCases { - t.Run(tc.name, func(t *testing.T) { - retVal = m.putToGc - - db, cleanupFunc := newTestDB(t, opts) + t.Run(fmt.Sprintf("%s %s putToGc=%v", tc.name, m.mode, m.putToGc), func(t *testing.T) { + db, cleanupFunc := newTestDB(t, &Options{ + PutToGCCheck: func(_ []byte) bool { return m.putToGc }, + }) defer cleanupFunc() wantStoreTimestamp := time.Now().UTC().UnixNano() @@ -434,6 +431,11 @@ func TestModePut_addToGcExisting(t *testing.T) { newRetrieveIndexesTestWithAccess(db, ch, wantStoreTimestamp, wantAccessTimestamp) newGCIndexTest(db, ch, wantStoreTimestamp, wantAccessTimestamp, binIDs[po], wantErr)(t) } + + if m.putToGc { + newItemsCountTest(db.gcIndex, tc.count)(t) + newIndexGCSizeTest(db)(t) + } }) } } @@ -464,7 +466,7 @@ func TestPutDuplicateChunks(t *testing.T) { t.Error("second chunk should exist") } - newItemsCountTest(db.retrievalDataIndex, 1)(t) + newDataCountTest(db, 1)(t) got, err := db.Get(context.Background(), chunk.ModeGetLookup, ch.Address()) if err != nil { diff --git a/storage/localstore/mode_set.go b/storage/localstore/mode_set.go index 0d5cf229f1..950b01bd4e 100644 --- a/storage/localstore/mode_set.go +++ b/storage/localstore/mode_set.go @@ -54,6 +54,7 @@ func (db *DB) set(mode chunk.ModeSet, addrs ...chunk.Address) (err error) { defer db.batchMu.Unlock() batch := new(leveldb.Batch) + var removeChunks bool // variables that provide information for operations // to be done after write batch function successfully executes @@ -96,6 +97,7 @@ func (db *DB) set(mode chunk.ModeSet, addrs ...chunk.Address) (err error) { } gcSizeChange += c } + removeChunks = true case chunk.ModeSetPin: for _, addr := range addrs { @@ -125,6 +127,15 @@ func (db *DB) set(mode chunk.ModeSet, addrs ...chunk.Address) (err error) { if err != nil { return err } + + if removeChunks { + for _, a := range addrs { + if err := db.data.Delete(a); err != nil { + return err + } + } + } + for po := range triggerPullFeed { db.triggerPullSubscriptions(po) } @@ -138,14 +149,14 @@ func (db *DB) setAccess(batch *leveldb.Batch, binIDs map[uint8]uint64, addr chun item := addressToItem(addr) - // need to get access timestamp here as it is not - // provided by the access function, and it is not - // a property of a chunk provided to Accessor.Put. - i, err := db.retrievalDataIndex.Get(item) + i, err := db.metaIndex.Get(item) switch err { case nil: - item.StoreTimestamp = i.StoreTimestamp item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp + item.AccessTimestamp = i.AccessTimestamp + db.gcIndex.DeleteInBatch(batch, item) + gcSizeChange-- case leveldb.ErrNotFound: db.pushIndex.DeleteInBatch(batch, item) item.StoreTimestamp = now() @@ -156,20 +167,7 @@ func (db *DB) setAccess(batch *leveldb.Batch, binIDs map[uint8]uint64, addr chun default: return 0, err } - - i, err = db.retrievalAccessIndex.Get(item) - switch err { - case nil: - item.AccessTimestamp = i.AccessTimestamp - db.gcIndex.DeleteInBatch(batch, item) - gcSizeChange-- - case leveldb.ErrNotFound: - // the chunk is not accessed before - default: - return 0, err - } item.AccessTimestamp = now() - db.retrievalAccessIndex.PutInBatch(batch, item) db.pullIndex.PutInBatch(batch, item) db.gcIndex.PutInBatch(batch, item) gcSizeChange++ @@ -191,20 +189,16 @@ func (db *DB) setSync(batch *leveldb.Batch, addr chunk.Address, mode chunk.ModeS // provided by the access function, and it is not // a property of a chunk provided to Accessor.Put. - i, err := db.retrievalDataIndex.Get(item) + i, err := db.metaIndex.Get(item) if err != nil { if err == leveldb.ErrNotFound { - // chunk is not found, - // no need to update gc index - // just delete from the push index - // if it is there - db.pushIndex.DeleteInBatch(batch, item) return 0, nil } return 0, err } - item.StoreTimestamp = i.StoreTimestamp item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp + item.AccessTimestamp = i.AccessTimestamp switch mode { case chunk.ModeSetSyncPull: @@ -276,19 +270,12 @@ func (db *DB) setSync(batch *leveldb.Batch, addr chunk.Address, mode chunk.ModeS db.pushIndex.DeleteInBatch(batch, item) } - i, err = db.retrievalAccessIndex.Get(item) - switch err { - case nil: - item.AccessTimestamp = i.AccessTimestamp + if item.AccessTimestamp > 0 { db.gcIndex.DeleteInBatch(batch, item) gcSizeChange-- - case leveldb.ErrNotFound: - // the chunk is not accessed before - default: - return 0, err } item.AccessTimestamp = now() - db.retrievalAccessIndex.PutInBatch(batch, item) + db.metaIndex.PutInBatch(batch, item) // Add in gcIndex only if this chunk is not pinned ok, err := db.pinIndex.Has(item) @@ -312,23 +299,18 @@ func (db *DB) setRemove(batch *leveldb.Batch, addr chunk.Address) (gcSizeChange // need to get access timestamp here as it is not // provided by the access function, and it is not // a property of a chunk provided to Accessor.Put. - i, err := db.retrievalAccessIndex.Get(item) + i, err := db.metaIndex.Get(item) switch err { case nil: + item.BinID = i.BinID + item.StoreTimestamp = i.StoreTimestamp item.AccessTimestamp = i.AccessTimestamp case leveldb.ErrNotFound: default: return 0, err } - i, err = db.retrievalDataIndex.Get(item) - if err != nil { - return 0, err - } - item.StoreTimestamp = i.StoreTimestamp - item.BinID = i.BinID - db.retrievalDataIndex.DeleteInBatch(batch, item) - db.retrievalAccessIndex.DeleteInBatch(batch, item) + db.metaIndex.DeleteInBatch(batch, item) db.pullIndex.DeleteInBatch(batch, item) db.gcIndex.DeleteInBatch(batch, item) // a check is needed for decrementing gcSize diff --git a/storage/localstore/mode_set_test.go b/storage/localstore/mode_set_test.go index abe84cba73..606a2acda3 100644 --- a/storage/localstore/mode_set_test.go +++ b/storage/localstore/mode_set_test.go @@ -333,22 +333,23 @@ func TestModeSetRemove(t *testing.T) { t.Run("retrieve indexes", func(t *testing.T) { for _, ch := range chunks { - wantErr := leveldb.ErrNotFound - _, err := db.retrievalDataIndex.Get(addressToItem(ch.Address())) + wantErr := chunk.ErrChunkNotFound + _, err := db.data.Get(ch.Address()) if err != wantErr { t.Errorf("got error %v, want %v", err, wantErr) } // access index should not be set - _, err = db.retrievalAccessIndex.Get(addressToItem(ch.Address())) + _, err = db.metaIndex.Get(addressToItem(ch.Address())) + wantErr = leveldb.ErrNotFound if err != wantErr { t.Errorf("got error %v, want %v", err, wantErr) } } - t.Run("retrieve data index count", newItemsCountTest(db.retrievalDataIndex, 0)) + t.Run("retrieve data index count", newDataCountTest(db, 0)) - t.Run("retrieve access index count", newItemsCountTest(db.retrievalAccessIndex, 0)) + t.Run("retrieve access index count", newItemsCountTest(db.metaIndex, 0)) }) for _, ch := range chunks { diff --git a/storage/localstore/subscription_pull_test.go b/storage/localstore/subscription_pull_test.go index 993fefe77d..cf679aee7b 100644 --- a/storage/localstore/subscription_pull_test.go +++ b/storage/localstore/subscription_pull_test.go @@ -569,7 +569,7 @@ func readPullSubscriptionBin(ctx context.Context, db *DB, bin uint8, ch <-chan c if !bytes.Equal(got.Address, addr) { err = fmt.Errorf("got chunk bin id %v in bin %v %v, want %v", i, bin, got.Address.Hex(), addr.Hex()) } else { - want, err := db.retrievalDataIndex.Get(shed.Item{ + want, err := db.metaIndex.Get(shed.Item{ Address: addr, }) if err != nil { diff --git a/storage/localstore/subscription_push.go b/storage/localstore/subscription_push.go index 1df1fabc9f..d5fe42eaa1 100644 --- a/storage/localstore/subscription_push.go +++ b/storage/localstore/subscription_push.go @@ -72,13 +72,13 @@ func (db *DB) SubscribePush(ctx context.Context) (c <-chan chunk.Chunk, stop fun var count int err := db.pushIndex.Iterate(func(item shed.Item) (stop bool, err error) { // get chunk data - dataItem, err := db.retrievalDataIndex.Get(item) + c, err := db.data.Get(item.Address) if err != nil { return true, err } select { - case chunks <- chunk.NewChunk(dataItem.Address, dataItem.Data).WithTagID(item.Tag): + case chunks <- chunk.NewChunk(c.Address(), c.Data()).WithTagID(item.Tag): count++ // set next iteration start item // when its chunk is successfully sent to channel From 2506b88acac8a7e85e93953fa073b9e8b9b263c6 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Fri, 6 Dec 2019 12:57:57 +0100 Subject: [PATCH 02/23] storage/{fcds,localstore}: add comments and minor adjustments --- storage/fcds/fcds.go | 66 ++++++++++++++++++++-------- storage/fcds/leveldb/leveldb.go | 41 +++++++++++------ storage/fcds/leveldb/leveldb_test.go | 4 +- storage/fcds/mem/mem.go | 15 +++++++ storage/fcds/mem/mem_test.go | 8 ++-- storage/fcds/meta.go | 5 +++ storage/fcds/mock/mock.go | 12 +++++ storage/fcds/mock/mock_test.go | 7 +-- storage/fcds/offsetcache.go | 8 ++++ storage/fcds/test/store.go | 55 +++++++++++------------ storage/localstore/localstore.go | 16 ++++--- 11 files changed, 162 insertions(+), 75 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index f960506498..fe23efbca7 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -28,10 +28,9 @@ import ( "github.com/ethersphere/swarm/chunk" ) -const shardCount = 32 - -var ErrDBClosed = errors.New("closed database") - +// Interface specifies methods required for FCDS implementation. +// It can be used where alternative implementations are needed to +// switch at runtime. type Interface interface { Get(addr chunk.Address) (ch chunk.Chunk, err error) Has(addr chunk.Address) (yes bool, err error) @@ -44,20 +43,33 @@ type Interface interface { var _ Interface = new(Store) +// Number of files that store chunk data. +const shardCount = 32 + +// ErrDBClosed is returned if database is already closed. +var ErrDBClosed = errors.New("closed database") + +// Store is the main FCDS implementation. It stores chunk data into +// a number of files partitioned by the last byte of the chunk address. type Store struct { - shards map[uint8]*os.File - shardsMu map[uint8]*sync.Mutex - meta MetaStore - free map[uint8]struct{} - freeMu sync.RWMutex - freeCache *offsetCache - wg sync.WaitGroup - maxChunkSize int - quit chan struct{} - quitOnce sync.Once + shards map[uint8]*os.File // relations with shard id and a shard file + shardsMu map[uint8]*sync.Mutex // mutex for every shard file + meta MetaStore // stores chunk offsets + free map[uint8]struct{} // which shards have free offsets + freeMu sync.RWMutex // protects free field + freeCache *offsetCache // optional cache of free offset values + wg sync.WaitGroup // blocks Close until all other method calls are done + maxChunkSize int // maximal chunk data size + quit chan struct{} // quit disables all operations after Close is called + quitOnce sync.Once // protects close channel from multiple Close calls } -func NewStore(path string, maxChunkSize int, metaStore MetaStore, noCache bool) (s *Store, err error) { +// NewStore constructs a new Store with files at path, with specified max chunk size. +// Argument withCache enables in memory cache of free chunk data positions in files. +func NewStore(path string, maxChunkSize int, metaStore MetaStore, withCache bool) (s *Store, err error) { + if err := os.MkdirAll(path, 0777); err != nil { + return nil, err + } shards := make(map[byte]*os.File, shardCount) shardsMu := make(map[uint8]*sync.Mutex) for i := byte(0); i < shardCount; i++ { @@ -67,10 +79,8 @@ func NewStore(path string, maxChunkSize int, metaStore MetaStore, noCache bool) } shardsMu[i] = new(sync.Mutex) } - var ( - freeCache *offsetCache - ) - if !noCache { + var freeCache *offsetCache + if withCache { freeCache = newOffsetCache(shardCount) } return &Store{ @@ -84,6 +94,7 @@ func NewStore(path string, maxChunkSize int, metaStore MetaStore, noCache bool) }, nil } +// Get returns a chunk with data. func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { done, err := s.protect() if err != nil { @@ -110,6 +121,7 @@ func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { return chunk.NewChunk(addr, data), nil } +// Has returns true if chunk is stored. func (s *Store) Has(addr chunk.Address) (yes bool, err error) { done, err := s.protect() if err != nil { @@ -131,6 +143,7 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { return true, nil } +// Put stores chunk data. func (s *Store) Put(ch chunk.Chunk) (err error) { done, err := s.protect() if err != nil { @@ -207,6 +220,7 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { }) } +// Delete removes chunk data. func (s *Store) Delete(addr chunk.Address) (err error) { done, err := s.protect() if err != nil { @@ -233,10 +247,12 @@ func (s *Store) Delete(addr chunk.Address) (err error) { return s.meta.Remove(addr, shard) } +// Count returns a number of stored chunks. func (s *Store) Count() (count int, err error) { return s.meta.Count() } +// Iterate iterates over stored chunks in no particular order. func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) { done, err := s.protect() if err != nil { @@ -263,6 +279,10 @@ func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) }) } +// Close disables of further operations on the Store. +// Every call to its methods will return ErrDBClosed error. +// Close will wait for all running operations to finish before +// closing its MetaStore and returning. func (s *Store) Close() (err error) { s.quitOnce.Do(func() { close(s.quit) @@ -286,9 +306,15 @@ func (s *Store) Close() (err error) { return s.meta.Close() } +// protect protects Store from executing operations +// after the Close method is called and makes sure +// that Close method will wait for all ongoing operations +// to finish before returning. Returned function done +// must be closed to unblock the Close method call. func (s *Store) protect() (done func(), err error) { select { case <-s.quit: + // Store is closed. return nil, ErrDBClosed default: } @@ -296,10 +322,12 @@ func (s *Store) protect() (done func(), err error) { return s.wg.Done, nil } +// getMeta returns Meta information from MetaStore. func (s *Store) getMeta(addr chunk.Address) (m *Meta, err error) { return s.meta.Get(addr) } +// getShard returns a shard number for the chunk address. func getShard(addr chunk.Address) (shard uint8) { return addr[len(addr)-1] % shardCount } diff --git a/storage/fcds/leveldb/leveldb.go b/storage/fcds/leveldb/leveldb.go index 637eac5715..1c54a7eb0c 100644 --- a/storage/fcds/leveldb/leveldb.go +++ b/storage/fcds/leveldb/leveldb.go @@ -27,12 +27,15 @@ import ( var _ fcds.MetaStore = new(MetaStore) +// MetaStore implements FCDS MetaStore with LevelDB +// for persistence. type MetaStore struct { db *leveldb.DB } -func NewMetaStore(filename string) (s *MetaStore, err error) { - db, err := leveldb.OpenFile(filename, &opt.Options{}) +// NewMetaStore returns new MetaStore at path. +func NewMetaStore(path string) (s *MetaStore, err error) { + db, err := leveldb.OpenFile(path, &opt.Options{}) if err != nil { return nil, err } @@ -41,6 +44,7 @@ func NewMetaStore(filename string) (s *MetaStore, err error) { }, err } +// Get returns chunk meta information. func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { data, err := s.db.Get(chunkKey(addr), nil) if err != nil { @@ -56,6 +60,9 @@ func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { return m, nil } +// Set adds a new chunk meta information for a shard. +// Reclaimed flag denotes that the chunk is at the place of +// already deleted chunk, not appended to the end of the file. func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds.Meta) (err error) { batch := new(leveldb.Batch) if reclaimed { @@ -69,6 +76,21 @@ func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds return s.db.Write(batch, nil) } +// Remove removes chunk meta information from the shard. +func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { + m, err := s.Get(addr) + if err != nil { + return err + } + batch := new(leveldb.Batch) + batch.Put(freeKey(shard, m.Offset), nil) + batch.Delete(chunkKey(addr)) + return s.db.Write(batch, nil) +} + +// FreeOffset returns an offset that can be reclaimed by +// another chunk. If the returned value is less then 0 +// there are no free offset at this shard. func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { i := s.db.NewIterator(nil, nil) defer i.Release() @@ -82,17 +104,8 @@ func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { return offset, nil } -func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { - m, err := s.Get(addr) - if err != nil { - return err - } - batch := new(leveldb.Batch) - batch.Put(freeKey(shard, m.Offset), nil) - batch.Delete(chunkKey(addr)) - return s.db.Write(batch, nil) -} - +// Count returns a number of chunks in MetaStore. +// This operation is slow for larger numbers of chunks. func (s *MetaStore) Count() (count int, err error) { it := s.db.NewIterator(nil, nil) defer it.Release() @@ -111,6 +124,7 @@ func (s *MetaStore) Count() (count int, err error) { return count, it.Error() } +// Iterate iterates over all chunk meta information. func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err error)) (err error) { it := s.db.NewIterator(nil, nil) defer it.Release() @@ -139,6 +153,7 @@ func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err e return it.Error() } +// Close closes the underlaying LevelDB instance. func (s *MetaStore) Close() (err error) { return s.db.Close() } diff --git a/storage/fcds/leveldb/leveldb_test.go b/storage/fcds/leveldb/leveldb_test.go index 81a1b5f952..25aa66e637 100644 --- a/storage/fcds/leveldb/leveldb_test.go +++ b/storage/fcds/leveldb/leveldb_test.go @@ -26,8 +26,10 @@ import ( "github.com/ethersphere/swarm/storage/fcds/test" ) +// TestFCDS runs a standard series of tests on main Store implementation +// with LevelDB meta store. func TestFCDS(t *testing.T) { - test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { path, err := ioutil.TempDir("", "swarm-fcds-") if err != nil { t.Fatal(err) diff --git a/storage/fcds/mem/mem.go b/storage/fcds/mem/mem.go index bce9bca857..4d4c5d1750 100644 --- a/storage/fcds/mem/mem.go +++ b/storage/fcds/mem/mem.go @@ -25,12 +25,15 @@ import ( var _ fcds.MetaStore = new(MetaStore) +// MetaStore is the simplest in-memory implementation of FCDS MetaStore. +// It is meant to be used as the reference implementation. type MetaStore struct { meta map[string]*fcds.Meta free map[uint8]map[int64]struct{} mu sync.RWMutex } +// NewMetaStore constructs a new MetaStore. func NewMetaStore() (s *MetaStore) { free := make(map[uint8]map[int64]struct{}) for shard := uint8(0); shard < 255; shard++ { @@ -42,6 +45,7 @@ func NewMetaStore() (s *MetaStore) { } } +// Get returns chunk meta information. func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { s.mu.RLock() m = s.meta[string(addr)] @@ -52,6 +56,9 @@ func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { return m, nil } +// Set adds a new chunk meta information for a shard. +// Reclaimed flag denotes that the chunk is at the place of +// already deleted chunk, not appended to the end of the file. func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds.Meta) (err error) { s.mu.Lock() if reclaimed { @@ -62,6 +69,7 @@ func (s *MetaStore) Set(addr chunk.Address, shard uint8, reclaimed bool, m *fcds return nil } +// Remove removes chunk meta information from the shard. func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { s.mu.Lock() defer s.mu.Unlock() @@ -75,6 +83,9 @@ func (s *MetaStore) Remove(addr chunk.Address, shard uint8) (err error) { return nil } +// FreeOffset returns an offset that can be reclaimed by +// another chunk. If the returned value is less then 0 +// there are no free offset at this shard. func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { s.mu.RLock() for o := range s.free[shard] { @@ -85,6 +96,7 @@ func (s *MetaStore) FreeOffset(shard uint8) (offset int64, err error) { return -1, nil } +// Count returns a number of chunks in MetaStore. func (s *MetaStore) Count() (count int, err error) { s.mu.RLock() count = len(s.meta) @@ -92,6 +104,7 @@ func (s *MetaStore) Count() (count int, err error) { return count, nil } +// Iterate iterates over all chunk meta information. func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err error)) (err error) { s.mu.RLock() defer s.mu.RUnlock() @@ -107,6 +120,8 @@ func (s *MetaStore) Iterate(fn func(chunk.Address, *fcds.Meta) (stop bool, err e return nil } +// Close doesn't do anything. +// It exists to implement fcdb.MetaStore interface. func (s *MetaStore) Close() (err error) { return nil } diff --git a/storage/fcds/mem/mem_test.go b/storage/fcds/mem/mem_test.go index 929f8f7778..56e372ac35 100644 --- a/storage/fcds/mem/mem_test.go +++ b/storage/fcds/mem/mem_test.go @@ -25,12 +25,10 @@ import ( "github.com/ethersphere/swarm/storage/fcds/test" ) -func init() { - test.Init() -} - +// TestFCDS runs a standard series of tests on main Store implementation +// with in-memory meta store. func TestFCDS(t *testing.T) { - test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { path, err := ioutil.TempDir("", "swarm-fcds-") if err != nil { t.Fatal(err) diff --git a/storage/fcds/meta.go b/storage/fcds/meta.go index 6ae7f8b1cf..68fb95e225 100644 --- a/storage/fcds/meta.go +++ b/storage/fcds/meta.go @@ -23,6 +23,8 @@ import ( "github.com/ethersphere/swarm/chunk" ) +// MetaStore defines methods to store and manage +// chunk meta information in Store FCDS implementation. type MetaStore interface { Get(addr chunk.Address) (*Meta, error) Set(addr chunk.Address, shard uint8, reclaimed bool, m *Meta) error @@ -33,11 +35,13 @@ type MetaStore interface { Close() error } +// Meta stores chunk data size and its offset in a file. type Meta struct { Size uint16 Offset int64 } +// MarshalBinary returns binary encoded value of meta chunk information. func (m *Meta) MarshalBinary() (data []byte, err error) { data = make([]byte, 10) binary.BigEndian.PutUint64(data[:8], uint64(m.Offset)) @@ -45,6 +49,7 @@ func (m *Meta) MarshalBinary() (data []byte, err error) { return data, nil } +// UnmarshalBinary sets meta chunk information from encoded data. func (m *Meta) UnmarshalBinary(data []byte) error { m.Offset = int64(binary.BigEndian.Uint64(data[:8])) m.Size = binary.BigEndian.Uint16(data[8:10]) diff --git a/storage/fcds/mock/mock.go b/storage/fcds/mock/mock.go index 41814bfc48..d37c479169 100644 --- a/storage/fcds/mock/mock.go +++ b/storage/fcds/mock/mock.go @@ -24,16 +24,21 @@ import ( var _ fcds.Interface = new(Store) +// Store implements FCDS Interface by using mock +// store for persistence. type Store struct { m *mock.NodeStore } +// NewStore returns a new store with mock NodeStore +// for storing Chunk data. func NewStore(m *mock.NodeStore) (s *Store) { return &Store{ m: m, } } +// Get returns a chunk with data. func (s *Store) Get(addr chunk.Address) (c chunk.Chunk, err error) { data, err := s.m.Get(addr) if err != nil { @@ -45,6 +50,7 @@ func (s *Store) Get(addr chunk.Address) (c chunk.Chunk, err error) { return chunk.NewChunk(addr, data), nil } +// Has returns true if chunk is stored. func (s *Store) Has(addr chunk.Address) (yes bool, err error) { _, err = s.m.Get(addr) if err != nil { @@ -56,14 +62,17 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { return true, nil } +// Put stores chunk data. func (s *Store) Put(ch chunk.Chunk) (err error) { return s.m.Put(ch.Address(), ch.Data()) } +// Delete removes chunk data. func (s *Store) Delete(addr chunk.Address) (err error) { return s.m.Delete(addr) } +// Count returns a number of stored chunks. func (s *Store) Count() (count int, err error) { var startKey []byte for { @@ -80,6 +89,7 @@ func (s *Store) Count() (count int, err error) { return count, nil } +// Iterate iterates over stored chunks in no particular order. func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) { var startKey []byte for { @@ -108,6 +118,8 @@ func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) return nil } +// Close doesn't do anything. +// It exists to implement fcdb.MetaStore interface. func (s *Store) Close() error { return nil } diff --git a/storage/fcds/mock/mock_test.go b/storage/fcds/mock/mock_test.go index 420b235f39..56bc02701e 100644 --- a/storage/fcds/mock/mock_test.go +++ b/storage/fcds/mock/mock_test.go @@ -26,12 +26,9 @@ import ( "github.com/ethersphere/swarm/storage/mock/mem" ) -func init() { - test.Init() -} - +// TestFCDS runs a standard series of tests on mock Store implementation. func TestFCDS(t *testing.T) { - test.Test(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { return mock.NewStore( mem.NewGlobalStore().NewNodeStore( common.BytesToAddress(make([]byte, 20)), diff --git a/storage/fcds/offsetcache.go b/storage/fcds/offsetcache.go index a28525db58..66311fdbc1 100644 --- a/storage/fcds/offsetcache.go +++ b/storage/fcds/offsetcache.go @@ -18,11 +18,14 @@ package fcds import "sync" +// offsetCache is a simple cache of offset integers +// by shard files. type offsetCache struct { m map[uint8]map[int64]struct{} mu sync.RWMutex } +// newOffsetCache constructs offsetCache for a fixed number of shards. func newOffsetCache(shardCount uint8) (c *offsetCache) { m := make(map[uint8]map[int64]struct{}) for i := uint8(0); i < shardCount; i++ { @@ -33,6 +36,9 @@ func newOffsetCache(shardCount uint8) (c *offsetCache) { } } +// get returns a free offset in a shard. If the returned +// value is less then 0, there are no free offset in that +// shard. func (c *offsetCache) get(shard uint8) (offset int64) { c.mu.RLock() for o := range c.m[shard] { @@ -43,12 +49,14 @@ func (c *offsetCache) get(shard uint8) (offset int64) { return -1 } +// set sets a free offset for a shard file. func (c *offsetCache) set(shard uint8, offset int64) { c.mu.Lock() c.m[shard][offset] = struct{}{} c.mu.Unlock() } +// remove removes a free offset for a shard file. func (c *offsetCache) remove(shard uint8, offset int64) { c.mu.Lock() delete(c.m[shard], offset) diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index 8ee8f5188d..1f663bf6aa 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -25,9 +25,9 @@ import ( "os" "sync" "testing" - "time" "github.com/ethersphere/swarm/chunk" + chunktesting "github.com/ethersphere/swarm/chunk/testing" "github.com/ethersphere/swarm/storage/fcds" ) @@ -37,22 +37,24 @@ var ( noCacheFlag = flag.Bool("no-cache", false, "Disable memory cache.") ) -func Init() { - testing.Init() +// Main parses custom cli flags automatically on test runs. +func Main(m *testing.M) { flag.Parse() + os.Exit(m.Run()) } -func Test(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { +// RunAll runs all available tests for a Store implementation. +func RunAll(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { t.Run("empty", func(t *testing.T) { - TestStore(t, &TestStoreOptions{ + RunStore(t, &RunStoreOptions{ ChunkCount: *chunksFlag, NewStoreFunc: newStoreFunc, }) }) t.Run("cleaned", func(t *testing.T) { - TestStore(t, &TestStoreOptions{ + RunStore(t, &RunStoreOptions{ ChunkCount: *chunksFlag, NewStoreFunc: newStoreFunc, Cleaned: true, @@ -89,7 +91,7 @@ func Test(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func()) }, } { t.Run(tc.name, func(t *testing.T) { - TestStore(t, &TestStoreOptions{ + RunStore(t, &RunStoreOptions{ ChunkCount: *chunksFlag, DeleteSplit: tc.deleteSplit, NewStoreFunc: newStoreFunc, @@ -98,18 +100,22 @@ func Test(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func()) } t.Run("iterator", func(t *testing.T) { - TestIterator(t, newStoreFunc) + RunIterator(t, newStoreFunc) }) } -type TestStoreOptions struct { +// RunStoreOptions define parameters for Store test function. +type RunStoreOptions struct { + NewStoreFunc func(t *testing.T) (fcds.Interface, func()) ChunkCount int DeleteSplit int Cleaned bool - NewStoreFunc func(t *testing.T) (fcds.Interface, func()) } -func TestStore(t *testing.T, o *TestStoreOptions) { +// RunStore tests a single Store implementation for its general functionalities. +// Subtests are deliberately separated into sections that can have timings +// printed on test runs for each of them. +func RunStore(t *testing.T, o *RunStoreOptions) { db, clean := o.NewStoreFunc(t) defer clean() @@ -238,7 +244,8 @@ func TestStore(t *testing.T, o *TestStoreOptions) { }) } -func TestIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { +// RunIterator validates behaviour of Iterate and Count methods on a Store. +func RunIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { chunkCount := 1000 db, clean := newStoreFunc(t) @@ -280,6 +287,8 @@ func TestIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, } } +// NewFCDSStore is a test helper function that constructs +// a new Store for testing purposes into which a specific MetaStore can be injected. func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds.Store, clean func()) { t.Helper() @@ -288,7 +297,7 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. t.Fatal(err) } - s, err = fcds.NewStore(path, chunk.DefaultSize, metaStore, *noCacheFlag) + s, err = fcds.NewStore(path, chunk.DefaultSize, metaStore, !*noCacheFlag) if err != nil { os.RemoveAll(path) t.Fatal(err) @@ -299,34 +308,26 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. } } +// chunkCache reduces the work done by generating random chunks +// by getChunks function by keeping storing them for future reuse. var chunkCache []chunk.Chunk +// getChunk returns a number of chunks with random data for testing purposes. +// By calling it multiple times, it will return same chunks from the cache. func getChunks(count int) []chunk.Chunk { l := len(chunkCache) if l == 0 { chunkCache = make([]chunk.Chunk, count) for i := 0; i < count; i++ { - chunkCache[i] = GenerateTestRandomChunk() + chunkCache[i] = chunktesting.GenerateTestRandomChunk() } return chunkCache } if l < count { for i := 0; i < count-l; i++ { - chunkCache = append(chunkCache, GenerateTestRandomChunk()) + chunkCache = append(chunkCache, chunktesting.GenerateTestRandomChunk()) } return chunkCache } return chunkCache[:count] } - -func init() { - rand.Seed(time.Now().UnixNano()) -} - -func GenerateTestRandomChunk() chunk.Chunk { - data := make([]byte, chunk.DefaultSize) - rand.Read(data) - key := make([]byte, 32) - rand.Read(key) - return chunk.NewChunk(key, data) -} diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index c68f65fc0f..50430539bc 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -222,16 +222,22 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { return nil, err } - metaStore, err := fcdsleveldb.NewMetaStore(filepath.Join(path, "meta")) - if err != nil { - return nil, err - } if o.MockStore == nil { - db.data, err = fcds.NewStore(path, chunk.DefaultSize+8, metaStore, false) + metaStore, err := fcdsleveldb.NewMetaStore(filepath.Join(path, "meta")) + if err != nil { + return nil, err + } + db.data, err = fcds.NewStore( + filepath.Join(path, "data"), + chunk.DefaultSize+8, // chunk data has additional 8 bytes prepended + metaStore, + true, // enable offset cache + ) if err != nil { return nil, err } } else { + // Mock store is provided, use mock FCDS. db.data = fcdsmock.NewStore(o.MockStore) } // Index storing bin id, store and access timestamp for a particular address. From c8f362296ee735c380fe621f204881d8d2edbbcb Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Fri, 6 Dec 2019 15:45:52 +0100 Subject: [PATCH 03/23] storage/fcds: add doc.go --- storage/fcds/doc.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 storage/fcds/doc.go diff --git a/storage/fcds/doc.go b/storage/fcds/doc.go new file mode 100644 index 0000000000..e73cc4c091 --- /dev/null +++ b/storage/fcds/doc.go @@ -0,0 +1,43 @@ +// Copyright 2019 The Swarm Authors +// This file is part of the Swarm library. +// +// The Swarm library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The Swarm library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the Swarm library. If not, see . + +// Package fcds provides storage layers for storing chunk data only. +// +// FCDS stands for Fixed Chunk Data Storage. +// +// Swarm Chunk data limited size property allows a very specific chunk storage +// solution that can be more performant than more generalized key/value +// databases. FCDS stores chunk data in files (shards) at fixed length offsets. +// Relations between chunk address, file number and offset in that file are +// managed by a separate MetaStore implementation. +// +// Package fcds contains the main implementation based on simple file operations +// for persisting chunk data and relaying on specific chunk meta information +// storage. +// +// The reference chunk meta information storage is implemented in fcds/mem +// package. It can be used in tests. +// +// LevelDB based chunk meta information storage is implemented in fcds/leveldb +// package. This implementation should be used as default in production. +// +// Additional FCDS Store implementation is in fcds/mock. It uses mock store and +// can be used for centralized chunk storage options that mock storage package +// provides. +// +// Package fcds/test contains test functions which can be used to validate +// behaviour of different FCDS or its MetaStore implementations. +package fcds From ec14da36ee4301ce938d7fcc95836bf1e4ef101d Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Mon, 9 Dec 2019 14:04:37 +0100 Subject: [PATCH 04/23] cmd/swarm, storage/localstore: support breaking migrations --- cmd/swarm/main.go | 13 +- storage/localstore/localstore.go | 2 +- storage/localstore/migration.go | 62 ++++++-- storage/localstore/migration_test.go | 229 ++++++++++++++++++++++++--- storage/localstore/schema.go | 54 +++---- swarm.go | 4 +- 6 files changed, 294 insertions(+), 70 deletions(-) diff --git a/cmd/swarm/main.go b/cmd/swarm/main.go index 1d506e6067..e0e25803f5 100644 --- a/cmd/swarm/main.go +++ b/cmd/swarm/main.go @@ -19,6 +19,7 @@ package main import ( "crypto/ecdsa" "encoding/hex" + "errors" "fmt" "io/ioutil" "net" @@ -46,6 +47,7 @@ import ( "github.com/ethersphere/swarm/internal/debug" swarmmetrics "github.com/ethersphere/swarm/metrics" "github.com/ethersphere/swarm/network" + "github.com/ethersphere/swarm/storage/localstore" "github.com/ethersphere/swarm/storage/mock" mockrpc "github.com/ethersphere/swarm/storage/mock/rpc" "github.com/ethersphere/swarm/tracing" @@ -367,7 +369,16 @@ func registerBzzService(bzzconfig *bzzapi.Config, stack *node.Node) { // create a node store for this swarm key on global store nodeStore = globalStore.NewNodeStore(common.HexToAddress(bzzconfig.BzzKey)) } - return swarm.NewSwarm(bzzconfig, nodeStore) + s, err := swarm.NewSwarm(bzzconfig, nodeStore) + if err != nil { + var e *localstore.BreakingMigrationError + if errors.As(err, &e) { + fmt.Println(e.Manual) + utils.Fatalf("Manual storage migration required.") + } + return nil, err + } + return s, nil } //register within the ethereum node if err := stack.Register(boot); err != nil { diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 50430539bc..77804d6ed4 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -204,7 +204,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { } if schemaName == "" { // initial new localstore run - err := db.schemaName.Put(DbSchemaCurrent) + err := db.schemaName.Put(dbSchemaCurrent) if err != nil { return nil, err } diff --git a/storage/localstore/migration.go b/storage/localstore/migration.go index 318f593fda..d0c850244e 100644 --- a/storage/localstore/migration.go +++ b/storage/localstore/migration.go @@ -20,6 +20,7 @@ import ( "encoding/binary" "errors" "fmt" + "strings" "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" @@ -30,22 +31,40 @@ import ( var errMissingCurrentSchema = errors.New("could not find current db schema") var errMissingTargetSchema = errors.New("could not find target db schema") +// BreakingMigrationError is returned from migration functions that require +// manual migration steps. +type BreakingMigrationError struct { + Manual string +} + +// NewBreakingMigrationError returns a new BreakingMigrationError +// with instructions for manual operations. +func NewBreakingMigrationError(manual ...string) *BreakingMigrationError { + return &BreakingMigrationError{ + Manual: strings.Join(manual, "\n"), + } +} + +func (e *BreakingMigrationError) Error() string { + return "breaking migration" +} + type migration struct { - name string // name of the schema - fn func(db *DB) error // the migration function that needs to be performed in order to get to the current schema name + name string // name of the schema + fn func(db *DB) error // the migration function that needs to be performed in order to get to the current schema name + breaking bool } // schemaMigrations contains an ordered list of the database schemes, that is // in order to run data migrations in the correct sequence var schemaMigrations = []migration{ - {name: DbSchemaPurity, fn: func(db *DB) error { return nil }}, - {name: DbSchemaHalloween, fn: func(db *DB) error { return nil }}, - {name: DbSchemaSanctuary, fn: func(db *DB) error { return nil }}, - {name: DbSchemaDiwali, fn: migrateSanctuary}, + {name: dbSchemaSanctuary, fn: func(db *DB) error { return nil }}, + {name: dbSchemaDiwali, fn: migrateSanctuary}, + {name: dbSchemaForky, fn: migrateDiwali, breaking: true}, } func (db *DB) migrate(schemaName string) error { - migrations, err := getMigrations(schemaName, DbSchemaCurrent, schemaMigrations) + migrations, err := getMigrations(schemaName, dbSchemaCurrent, schemaMigrations) if err != nil { return fmt.Errorf("error getting migrations for current schema (%s): %v", schemaName, err) } @@ -84,23 +103,31 @@ type migrationFn func(db *DB) error func getMigrations(currentSchema, targetSchema string, allSchemeMigrations []migration) (migrations []migration, err error) { foundCurrent := false foundTarget := false - if currentSchema == DbSchemaCurrent { + if currentSchema == dbSchemaCurrent { return nil, nil } for i, v := range allSchemeMigrations { - switch v.name { - case currentSchema: + if v.name == targetSchema { + foundTarget = true + } + if v.name == currentSchema { if foundCurrent { return nil, errors.New("found schema name for the second time when looking for migrations") } foundCurrent = true - log.Info("found current localstore schema", "currentSchema", currentSchema, "migrateTo", DbSchemaCurrent, "total migrations", len(allSchemeMigrations)-i) + log.Info("found current localstore schema", "currentSchema", currentSchema, "migrateTo", dbSchemaCurrent, "total migrations", len(allSchemeMigrations)-i) continue // current schema migration should not be executed (already has been when schema was migrated to) - case targetSchema: - foundTarget = true } if foundCurrent { - migrations = append(migrations, v) + if v.breaking { + // discard all migrations before a breaking one + migrations = []migration{v} + } else { + migrations = append(migrations, v) + } + } + if foundTarget { + break } } if !foundCurrent { @@ -181,3 +208,10 @@ func migrateSanctuary(db *DB) error { return db.shed.WriteBatch(batch) } + +func migrateDiwali(db *DB) error { + return NewBreakingMigrationError( + "Swarm chunk storage layer is changed.", + "Please do a manual export to preserve chunk data.", + ) +} diff --git a/storage/localstore/migration_test.go b/storage/localstore/migration_test.go index 1c3ca97b4d..1b7e0d040d 100644 --- a/storage/localstore/migration_test.go +++ b/storage/localstore/migration_test.go @@ -23,6 +23,7 @@ import ( "math/rand" "os" "path" + "reflect" "strings" "testing" @@ -32,19 +33,19 @@ import ( func TestOneMigration(t *testing.T) { defer func(v []migration, s string) { schemaMigrations = v - DbSchemaCurrent = s - }(schemaMigrations, DbSchemaCurrent) + dbSchemaCurrent = s + }(schemaMigrations, dbSchemaCurrent) - DbSchemaCurrent = DbSchemaSanctuary + dbSchemaCurrent = dbSchemaSanctuary ran := false shouldNotRun := false schemaMigrations = []migration{ - {name: DbSchemaSanctuary, fn: func(db *DB) error { + {name: dbSchemaSanctuary, fn: func(db *DB) error { shouldNotRun = true // this should not be executed return nil }}, - {name: DbSchemaDiwali, fn: func(db *DB) error { + {name: dbSchemaDiwali, fn: func(db *DB) error { ran = true return nil }}, @@ -71,7 +72,7 @@ func TestOneMigration(t *testing.T) { t.Fatal(err) } - DbSchemaCurrent = DbSchemaDiwali + dbSchemaCurrent = dbSchemaDiwali // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) @@ -84,8 +85,8 @@ func TestOneMigration(t *testing.T) { t.Fatal(err) } - if schemaName != DbSchemaDiwali { - t.Errorf("schema name mismatch. got '%s', want '%s'", schemaName, DbSchemaDiwali) + if schemaName != dbSchemaDiwali { + t.Errorf("schema name mismatch. got '%s', want '%s'", schemaName, dbSchemaDiwali) } if !ran { @@ -105,20 +106,20 @@ func TestOneMigration(t *testing.T) { func TestManyMigrations(t *testing.T) { defer func(v []migration, s string) { schemaMigrations = v - DbSchemaCurrent = s - }(schemaMigrations, DbSchemaCurrent) + dbSchemaCurrent = s + }(schemaMigrations, dbSchemaCurrent) - DbSchemaCurrent = DbSchemaSanctuary + dbSchemaCurrent = dbSchemaSanctuary shouldNotRun := false executionOrder := []int{-1, -1, -1, -1} schemaMigrations = []migration{ - {name: DbSchemaSanctuary, fn: func(db *DB) error { + {name: dbSchemaSanctuary, fn: func(db *DB) error { shouldNotRun = true // this should not be executed return nil }}, - {name: DbSchemaDiwali, fn: func(db *DB) error { + {name: dbSchemaDiwali, fn: func(db *DB) error { executionOrder[0] = 0 return nil }}, @@ -157,7 +158,7 @@ func TestManyMigrations(t *testing.T) { t.Fatal(err) } - DbSchemaCurrent = "salvation" + dbSchemaCurrent = "salvation" // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) @@ -190,14 +191,194 @@ func TestManyMigrations(t *testing.T) { } } +// TestGetMigrations validates the migration selection based on +// current and target schema names. +func TestGetMigrations(t *testing.T) { + currentSchema := "current" + defaultTargetSchema := "target" + + for _, tc := range []struct { + name string + targetSchema string + migrations []migration + wantMigrations []migration + }{ + { + name: "empty", + targetSchema: "current", + migrations: []migration{ + {name: "current"}, + }, + }, + { + name: "single", + migrations: []migration{ + {name: "current"}, + {name: "target"}, + }, + wantMigrations: []migration{ + {name: "target"}, + }, + }, + { + name: "multiple", + migrations: []migration{ + {name: "current"}, + {name: "middle"}, + {name: "target"}, + }, + wantMigrations: []migration{ + {name: "middle"}, + {name: "target"}, + }, + }, + { + name: "between", + migrations: []migration{ + {name: "current"}, + {name: "target"}, + {name: "future"}, + }, + wantMigrations: []migration{ + {name: "target"}, + }, + }, + { + name: "between multiple", + migrations: []migration{ + {name: "current"}, + {name: "middle"}, + {name: "target"}, + {name: "future"}, + }, + wantMigrations: []migration{ + {name: "middle"}, + {name: "target"}, + }, + }, + { + name: "with previous", + migrations: []migration{ + {name: "previous"}, + {name: "current"}, + {name: "target"}, + }, + wantMigrations: []migration{ + {name: "target"}, + }, + }, + { + name: "with previous multiple", + migrations: []migration{ + {name: "previous"}, + {name: "current"}, + {name: "middle"}, + {name: "target"}, + }, + wantMigrations: []migration{ + {name: "middle"}, + {name: "target"}, + }, + }, + { + name: "breaking", + migrations: []migration{ + {name: "current"}, + {name: "target", breaking: true}, + }, + wantMigrations: []migration{ + {name: "target", breaking: true}, + }, + }, + { + name: "breaking multiple", + migrations: []migration{ + {name: "current"}, + {name: "middle"}, + {name: "breaking", breaking: true}, + {name: "target"}, + }, + wantMigrations: []migration{ + {name: "breaking", breaking: true}, + {name: "target"}, + }, + }, + { + name: "breaking with previous", + migrations: []migration{ + {name: "previous"}, + {name: "current"}, + {name: "target", breaking: true}, + }, + wantMigrations: []migration{ + {name: "target", breaking: true}, + }, + }, + { + name: "breaking multiple breaks", + migrations: []migration{ + {name: "current"}, + {name: "middle", breaking: true}, + {name: "target", breaking: true}, + }, + wantMigrations: []migration{ + {name: "target", breaking: true}, + }, + }, + { + name: "breaking multiple with middle", + migrations: []migration{ + {name: "current"}, + {name: "breaking", breaking: true}, + {name: "middle"}, + {name: "target", breaking: true}, + }, + wantMigrations: []migration{ + {name: "target", breaking: true}, + }, + }, + { + name: "breaking multiple between", + migrations: []migration{ + {name: "current"}, + {name: "breaking", breaking: true}, + {name: "middle"}, + {name: "target", breaking: true}, + {name: "future"}, + }, + wantMigrations: []migration{ + {name: "target", breaking: true}, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + targetSchema := tc.targetSchema + if targetSchema == "" { + targetSchema = defaultTargetSchema + } + got, err := getMigrations( + currentSchema, + targetSchema, + tc.migrations, + ) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, tc.wantMigrations) { + t.Errorf("got migrations %v, want %v", got, tc.wantMigrations) + } + }) + } +} + // TestMigrationFailFrom checks that local store boot should fail when the schema we're migrating from cannot be found func TestMigrationFailFrom(t *testing.T) { defer func(v []migration, s string) { schemaMigrations = v - DbSchemaCurrent = s - }(schemaMigrations, DbSchemaCurrent) + dbSchemaCurrent = s + }(schemaMigrations, dbSchemaCurrent) - DbSchemaCurrent = "koo-koo-schema" + dbSchemaCurrent = "koo-koo-schema" shouldNotRun := false schemaMigrations = []migration{ @@ -236,7 +417,7 @@ func TestMigrationFailFrom(t *testing.T) { t.Fatal(err) } - DbSchemaCurrent = "foo" + dbSchemaCurrent = "foo" // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) @@ -253,10 +434,10 @@ func TestMigrationFailFrom(t *testing.T) { func TestMigrationFailTo(t *testing.T) { defer func(v []migration, s string) { schemaMigrations = v - DbSchemaCurrent = s - }(schemaMigrations, DbSchemaCurrent) + dbSchemaCurrent = s + }(schemaMigrations, dbSchemaCurrent) - DbSchemaCurrent = "langur" + dbSchemaCurrent = "langur" shouldNotRun := false schemaMigrations = []migration{ @@ -295,7 +476,7 @@ func TestMigrationFailTo(t *testing.T) { t.Fatal(err) } - DbSchemaCurrent = "foo" + dbSchemaCurrent = "foo" // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) @@ -350,8 +531,8 @@ func TestMigrateSanctuaryFixture(t *testing.T) { t.Fatal(err) } - if schemaName != DbSchemaCurrent { - t.Fatalf("schema name mismatch, want '%s' got '%s'", DbSchemaCurrent, schemaName) + if schemaName != dbSchemaCurrent { + t.Fatalf("schema name mismatch, want '%s' got '%s'", dbSchemaCurrent, schemaName) } err = db.Close() diff --git a/storage/localstore/schema.go b/storage/localstore/schema.go index 512869f3bf..ba0e956096 100644 --- a/storage/localstore/schema.go +++ b/storage/localstore/schema.go @@ -22,50 +22,50 @@ import ( "github.com/syndtr/goleveldb/leveldb/opt" ) -// The DB schema we want to use. The actual/current DB schema might differ -// until migrations are run. -var DbSchemaCurrent = DbSchemaDiwali +// dbSchemaCurrent is the schema name of the current implementation. +// The actual/current DB schema might differ until migrations are run. +var dbSchemaCurrent = dbSchemaForky -// There was a time when we had no schema at all. -const DbSchemaNone = "" - -// "purity" is the first formal schema of LevelDB we release together with Swarm 0.3.5 -const DbSchemaPurity = "purity" - -// "halloween" is here because we had a screw in the garbage collector index. -// Because of that we had to rebuild the GC index to get rid of erroneous -// entries and that takes a long time. This schema is used for bookkeeping, -// so rebuild index will run just once. -const DbSchemaHalloween = "halloween" +const ( + // dbSchemaSanctuary is the first storage/localstore schema. + dbSchemaSanctuary = "sanctuary" + // dbSchemaDiwali migration simply renames the pullIndex in localstore. + dbSchemaDiwali = "diwali" + // dbSchemaForky migration implements FCDS storage and requires manual import and export. + dbSchemaForky = "forky" +) -const DbSchemaSanctuary = "sanctuary" +// IsLegacyDatabase returns true if legacy database is in the data directory. +func IsLegacyDatabase(datadir string) bool { -// the "diwali" migration simply renames the pullIndex in localstore -const DbSchemaDiwali = "diwali" + // "purity" is the first formal schema of LevelDB we release together with Swarm 0.3.5 + const dbSchemaPurity = "purity" -// returns true if legacy database is in the datadir -func IsLegacyDatabase(datadir string) bool { + // "halloween" is here because we had a screw in the garbage collector index. + // Because of that we had to rebuild the GC index to get rid of erroneous + // entries and that takes a long time. This schema is used for bookkeeping, + // so rebuild index will run just once. + const dbSchemaHalloween = "halloween" - var ( - legacyDbSchemaKey = []byte{8} - ) + var legacyDBSchemaKey = []byte{8} db, err := leveldb.OpenFile(datadir, &opt.Options{OpenFilesCacheCapacity: 128}) if err != nil { - log.Error("got an error while trying to open leveldb path", "path", datadir, "err", err) + log.Error("open leveldb", "path", datadir, "err", err) return false } defer db.Close() - data, err := db.Get(legacyDbSchemaKey, nil) + data, err := db.Get(legacyDBSchemaKey, nil) if err != nil { if err == leveldb.ErrNotFound { // if we haven't found anything under the legacy db schema key- we are not on legacy return false } - log.Error("got an unexpected error fetching legacy name from the database", "err", err) + log.Error("get legacy name from", "err", err) } - log.Trace("checking if database scheme is legacy", "schema name", string(data)) - return string(data) == DbSchemaHalloween || string(data) == DbSchemaPurity + schema := string(data) + log.Trace("checking if database scheme is legacy", "schema name", schema) + return schema == dbSchemaHalloween || schema == dbSchemaPurity } diff --git a/swarm.go b/swarm.go index 7c2b04a4bc..e798a843dc 100644 --- a/swarm.go +++ b/swarm.go @@ -198,9 +198,7 @@ func NewSwarm(config *api.Config, mockStore *mock.NodeStore) (self *Swarm, err e // check that we are not in the old database schema // if so - fail and exit - isLegacy := localstore.IsLegacyDatabase(config.ChunkDbPath) - - if isLegacy { + if localstore.IsLegacyDatabase(config.ChunkDbPath) { return nil, errors.New("Legacy database format detected! Please read the migration announcement at: https://github.com/ethersphere/swarm/blob/master/docs/Migration-v0.3-to-v0.4.md") } From e352b882154f2eb84a8472b90d61dae0a6a85ac3 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Mon, 9 Dec 2019 16:45:20 +0100 Subject: [PATCH 05/23] cmd/swarm, storage/localstore: improve migrations, export and import --- cmd/swarm/db.go | 5 ---- storage/localstore/export.go | 29 ++++++++++++++------ storage/localstore/localstore.go | 38 +++++++++++++++++++++----- storage/localstore/migration.go | 40 ++++++++++++++++++++++------ storage/localstore/migration_test.go | 28 ++++++++++++++----- swarm.go | 3 +++ 6 files changed, 109 insertions(+), 34 deletions(-) diff --git a/cmd/swarm/db.go b/cmd/swarm/db.go index 856d3e1d8a..b3d5a7764b 100644 --- a/cmd/swarm/db.go +++ b/cmd/swarm/db.go @@ -24,7 +24,6 @@ import ( "fmt" "io" "os" - "path/filepath" "github.com/ethereum/go-ethereum/cmd/utils" "github.com/ethereum/go-ethereum/common" @@ -168,10 +167,6 @@ func dbImport(ctx *cli.Context) { } func openLDBStore(path string, basekey []byte) (*localstore.DB, error) { - if _, err := os.Stat(filepath.Join(path, "CURRENT")); err != nil { - return nil, fmt.Errorf("invalid chunkdb path: %s", err) - } - return localstore.New(path, basekey, nil) } diff --git a/storage/localstore/export.go b/storage/localstore/export.go index ccb6287860..29ae6c1b53 100644 --- a/storage/localstore/export.go +++ b/storage/localstore/export.go @@ -27,6 +27,7 @@ import ( "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/shed" ) const ( @@ -57,24 +58,36 @@ func (db *DB) Export(w io.Writer) (count int64, err error) { return 0, err } - err = db.data.Iterate(func(ch chunk.Chunk) (stop bool, err error) { - - data := ch.Data() - + exportchunk := func(addr chunk.Address, data []byte) error { hdr := &tar.Header{ - Name: hex.EncodeToString(ch.Address()), + Name: hex.EncodeToString(addr), Mode: 0644, Size: int64(len(data)), } if err := tw.WriteHeader(hdr); err != nil { - return false, err + return err } if _, err := tw.Write(data); err != nil { - return false, err + return err } count++ - return false, nil + return nil + } + + // Export legacy (pre fcds) data index. + // This is required as a manual step in migrateDiwali migration. + err = db.retrievalDataIndex.Iterate(func(item shed.Item) (stop bool, err error) { + err = exportchunk(item.Address, item.Data) + return false, err + }, nil) + if err != nil { + return 0, err + } + + err = db.data.Iterate(func(ch chunk.Chunk) (stop bool, err error) { + err = exportchunk(ch.Address(), ch.Data()) + return false, err }) return count, err diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 77804d6ed4..313778bc24 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -62,6 +62,8 @@ type DB struct { shed *shed.DB tags *chunk.Tags + path string + // schema name of loaded data schemaName shed.StringField @@ -69,6 +71,8 @@ type DB struct { data fcds.Interface // bin index and timestamps index metaIndex shed.Index + // legacy data index, used only in export for manual migration + retrievalDataIndex shed.Index // push syncing index pushIndex shed.Index // push syncing subscriptions triggers @@ -169,6 +173,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { } db = &DB{ + path: path, capacity: o.Capacity, baseKey: baseKey, tags: o.Tags, @@ -208,12 +213,6 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { if err != nil { return nil, err } - } else { - // execute possible migrations - err = db.migrate(schemaName) - if err != nil { - return nil, err - } } // Persist gc size. @@ -267,6 +266,33 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { if err != nil { return nil, err } + // Index storing actual chunk address, data and bin id. + // Used only in export to provide migration functionality. + db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|BinID|Data", shed.IndexFuncs{ + EncodeKey: func(fields shed.Item) (key []byte, err error) { + return fields.Address, nil + }, + DecodeKey: func(key []byte) (e shed.Item, err error) { + e.Address = key + return e, nil + }, + EncodeValue: func(fields shed.Item) (value []byte, err error) { + b := make([]byte, 16) + binary.BigEndian.PutUint64(b[:8], fields.BinID) + binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp)) + value = append(b, fields.Data...) + return value, nil + }, + DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) { + e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[8:16])) + e.BinID = binary.BigEndian.Uint64(value[:8]) + e.Data = value[16:] + return e, nil + }, + }) + if err != nil { + return nil, err + } // pull index allows history and live syncing per po bin db.pullIndex, err = db.shed.NewIndex("PO|BinID->Hash|Tag", shed.IndexFuncs{ EncodeKey: func(fields shed.Item) (key []byte, err error) { diff --git a/storage/localstore/migration.go b/storage/localstore/migration.go index d0c850244e..54b2340cff 100644 --- a/storage/localstore/migration.go +++ b/storage/localstore/migration.go @@ -20,7 +20,6 @@ import ( "encoding/binary" "errors" "fmt" - "strings" "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" @@ -39,9 +38,9 @@ type BreakingMigrationError struct { // NewBreakingMigrationError returns a new BreakingMigrationError // with instructions for manual operations. -func NewBreakingMigrationError(manual ...string) *BreakingMigrationError { +func NewBreakingMigrationError(manual string) *BreakingMigrationError { return &BreakingMigrationError{ - Manual: strings.Join(manual, "\n"), + Manual: manual, } } @@ -49,6 +48,18 @@ func (e *BreakingMigrationError) Error() string { return "breaking migration" } +func (db *DB) Migrate() (err error) { + schemaName, err := db.schemaName.Get() + if err != nil { + return err + } + if schemaName == "" { + return nil + } + // execute possible migrations + return db.migrate(schemaName) +} + type migration struct { name string // name of the schema fn func(db *DB) error // the migration function that needs to be performed in order to get to the current schema name @@ -66,7 +77,7 @@ var schemaMigrations = []migration{ func (db *DB) migrate(schemaName string) error { migrations, err := getMigrations(schemaName, dbSchemaCurrent, schemaMigrations) if err != nil { - return fmt.Errorf("error getting migrations for current schema (%s): %v", schemaName, err) + return fmt.Errorf("get migrations for current schema %s: %w", schemaName, err) } // no migrations to run @@ -210,8 +221,21 @@ func migrateSanctuary(db *DB) error { } func migrateDiwali(db *DB) error { - return NewBreakingMigrationError( - "Swarm chunk storage layer is changed.", - "Please do a manual export to preserve chunk data.", - ) + return NewBreakingMigrationError(fmt.Sprintf(` +Swarm chunk storage layer is changed. + +You can choose if you want to do a manual migration or to discard current data. + +Preserving data requires additional storage roughly the size of the data directory and may take longer time depending on storage performance. + +To continue by discarding data, just remove %[1]s directory and start the swarm binary again. + +To preserve data: + - export data + swarm db export %[1]s data.tar %[2]x + - remove data directory %[1]s + - import data + swarm db import %[1]s data.tar %[2]x + - start the swarm +`, db.path, db.baseKey)) } diff --git a/storage/localstore/migration_test.go b/storage/localstore/migration_test.go index 1b7e0d040d..b87ebef0af 100644 --- a/storage/localstore/migration_test.go +++ b/storage/localstore/migration_test.go @@ -17,6 +17,7 @@ package localstore import ( + "errors" "io" "io/ioutil" "log" @@ -24,7 +25,6 @@ import ( "os" "path" "reflect" - "strings" "testing" "github.com/ethersphere/swarm/chunk" @@ -79,6 +79,9 @@ func TestOneMigration(t *testing.T) { if err != nil { t.Fatal(err) } + if err := db.Migrate(); err != nil { + t.Fatal(err) + } schemaName, err := db.schemaName.Get() if err != nil { @@ -165,6 +168,9 @@ func TestManyMigrations(t *testing.T) { if err != nil { t.Fatal(err) } + if err := db.Migrate(); err != nil { + t.Fatal(err) + } schemaName, err := db.schemaName.Get() if err != nil { @@ -421,8 +427,12 @@ func TestMigrationFailFrom(t *testing.T) { // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) - if !strings.Contains(err.Error(), errMissingCurrentSchema.Error()) { - t.Fatalf("expected errCannotFindSchema but got %v", err) + if err != nil { + t.Fatal(err) + } + + if err := db.Migrate(); !errors.Is(err, errMissingCurrentSchema) { + t.Fatalf("got error %v, want %v", err, errMissingCurrentSchema) } if shouldNotRun { @@ -480,8 +490,12 @@ func TestMigrationFailTo(t *testing.T) { // start the existing localstore and expect the migration to run db, err = New(dir, baseKey, nil) - if !strings.Contains(err.Error(), errMissingTargetSchema.Error()) { - t.Fatalf("expected errMissingTargetSchema but got %v", err) + if err != nil { + t.Fatal(err) + } + + if err := db.Migrate(); !errors.Is(err, errMissingTargetSchema) { + t.Fatalf("got error %v, want %v", err, errMissingTargetSchema) } if shouldNotRun { @@ -531,8 +545,8 @@ func TestMigrateSanctuaryFixture(t *testing.T) { t.Fatal(err) } - if schemaName != dbSchemaCurrent { - t.Fatalf("schema name mismatch, want '%s' got '%s'", dbSchemaCurrent, schemaName) + if schemaName != dbSchemaSanctuary { + t.Fatalf("schema name mismatch, want '%s' got '%s'", dbSchemaSanctuary, schemaName) } err = db.Close() diff --git a/swarm.go b/swarm.go index e798a843dc..f9fa87d11b 100644 --- a/swarm.go +++ b/swarm.go @@ -232,6 +232,9 @@ func NewSwarm(config *api.Config, mockStore *mock.NodeStore) (self *Swarm, err e if err != nil { return nil, err } + if err := localStore.Migrate(); err != nil { + return nil, err + } lstore := chunk.NewValidatorStore( localStore, storage.NewContentAddressValidator(storage.MakeHashFunc(storage.DefaultHash)), From c8c16e1e61daf201ef5ebc1f8f29c278e2e79375 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Mon, 9 Dec 2019 18:39:22 +0100 Subject: [PATCH 06/23] storage/localstore: export pins --- storage/localstore/export.go | 120 ++++++++++++++++++++++++++++-- storage/localstore/export_test.go | 47 +++++++++++- storage/localstore/migration.go | 3 + storage/localstore/mode_set.go | 8 +- 4 files changed, 165 insertions(+), 13 deletions(-) diff --git a/storage/localstore/export.go b/storage/localstore/export.go index 29ae6c1b53..c758e616e8 100644 --- a/storage/localstore/export.go +++ b/storage/localstore/export.go @@ -18,16 +18,20 @@ package localstore import ( "archive/tar" + "bufio" "context" + "encoding/binary" "encoding/hex" "fmt" "io" "io/ioutil" + "strings" "sync" "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/shed" + "github.com/syndtr/goleveldb/leveldb" ) const ( @@ -38,6 +42,9 @@ const ( legacyExportVersion = "1" // current export format version currentExportVersion = "2" + // tags + tagsFilenamePrefix = "tags-" + exportTagsFileLimit = 1000 ) // Export writes a tar structured data to the writer of @@ -58,6 +65,50 @@ func (db *DB) Export(w io.Writer) (count int64, err error) { return 0, err } + // tags export + var ( + tagsFileNumber int + tagsCounter int + tags []byte + + writeTags = func() (err error) { + l := len(tags) + if l == 0 { + return nil + } + + tagsCounter = 0 + tagsFileNumber++ + + if err := tw.WriteHeader(&tar.Header{ + Name: fmt.Sprintf("%s%v", tagsFilenamePrefix, tagsFileNumber), + Mode: 0644, + Size: int64(l), + }); err != nil { + return err + } + + _, err = tw.Write(tags) + return err + } + ) + err = db.pinIndex.Iterate(func(item shed.Item) (stop bool, err error) { + tags = append(tags, encodeExportPin(item.Address, item.PinCounter)...) + tags = append(tags, '\n') + if tagsCounter == exportTagsFileLimit { + if err := writeTags(); err != nil { + return true, err + } + } + return false, nil + }, nil) + if err != nil { + return 0, err + } + if err := writeTags(); err != nil { + return 0, err + } + exportchunk := func(addr chunk.Address, data []byte) error { hdr := &tar.Header{ Name: hex.EncodeToString(addr), @@ -108,7 +159,6 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { var wg sync.WaitGroup go func() { var ( - firstFile = true // if exportVersionFilename file is not present // assume legacy version version = legacyExportVersion @@ -124,19 +174,57 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { case <-ctx.Done(): } } - if firstFile { - firstFile = false - if hdr.Name == exportVersionFilename { - data, err := ioutil.ReadAll(tr) - if err != nil { + // get the export file format version + if hdr.Name == exportVersionFilename { + data, err := ioutil.ReadAll(tr) + if err != nil { + select { + case errC <- err: + case <-ctx.Done(): + } + } + version = string(data) + continue + } + // set pinned chunks + if strings.HasPrefix(hdr.Name, tagsFilenamePrefix) { + // All chunks are put before tag files are iterated on + // because of tagsFilenamePrefix starts with "t" + // which is ordered later then hex characters of chunk + // addresses. + // + // Wait for chunks to be stored before continuing. + wg.Wait() + + scanner := bufio.NewScanner(tr) + batch := new(leveldb.Batch) + for scanner.Scan() { + addr, counter := decodeExportPin(scanner.Bytes()) + if addr == nil { + continue + } + if err := db.setPin(batch, addr, counter); err != nil { select { case errC <- err: case <-ctx.Done(): } } - version = string(data) - continue } + + if err := scanner.Err(); err != nil { + select { + case errC <- err: + case <-ctx.Done(): + } + } + + if err := db.shed.WriteBatch(batch); err != nil { + select { + case errC <- err: + case <-ctx.Done(): + } + } + continue } if len(hdr.Name) != 64 { @@ -218,3 +306,19 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { } } } + +func encodeExportPin(addr chunk.Address, counter uint64) (data []byte) { + data = make([]byte, 8, 8+len(addr)) + binary.BigEndian.PutUint64(data[:8], counter) + data = append(data, addr...) + return data +} + +func decodeExportPin(data []byte) (addr chunk.Address, counter uint64) { + if len(data) < 8 { + return nil, 0 + } + counter = binary.BigEndian.Uint64(data[:8]) + addr = chunk.Address(data[8:]) + return addr, counter +} diff --git a/storage/localstore/export_test.go b/storage/localstore/export_test.go index 3681159673..dc2d63ea8f 100644 --- a/storage/localstore/export_test.go +++ b/storage/localstore/export_test.go @@ -19,9 +19,12 @@ package localstore import ( "bytes" "context" + "errors" + "fmt" "testing" "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/shed" ) // TestExportImport constructs two databases, one to put and export @@ -31,9 +34,13 @@ func TestExportImport(t *testing.T) { db1, cleanup1 := newTestDB(t, nil) defer cleanup1() - var chunkCount = 100 + var ( + chunkCount = exportTagsFileLimit * 3 + pinnedCount = exportTagsFileLimit*2 + 10 + ) chunks := make(map[string][]byte, chunkCount) + pinned := make(map[string]uint64, pinnedCount) for i := 0; i < chunkCount; i++ { ch := generateTestRandomChunk() @@ -42,8 +49,44 @@ func TestExportImport(t *testing.T) { t.Fatal(err) } chunks[string(ch.Address())] = ch.Data() + if i < pinnedCount { + count := uint64(i%84) + 1 // arbitrary pin counter + for i := uint64(0); i < count; i++ { + err := db1.Set(context.Background(), chunk.ModeSetPin, ch.Address()) + if err != nil { + t.Fatal(err) + } + } + pinned[string(ch.Address())] = count + } } + validtePins := func(t *testing.T, db *DB) { + t.Helper() + + var got int + err := db1.pinIndex.Iterate(func(item shed.Item) (stop bool, err error) { + count, ok := pinned[string(item.Address)] + if !ok { + return true, errors.New("chunk not pinned") + } + if count != item.PinCounter { + return true, fmt.Errorf("got pin count %v for chunk %x, want %v", item.PinCounter, item.Address, count) + } + got++ + return false, nil + }, nil) + if err != nil { + t.Fatal(err) + } + + if got != pinnedCount { + t.Fatalf("got pinned chunks %v, want %v", got, pinnedCount) + } + } + + validtePins(t, db1) + var buf bytes.Buffer c, err := db1.Export(&buf) @@ -77,4 +120,6 @@ func TestExportImport(t *testing.T) { t.Fatalf("chunk %s: got data %x, want %x", addr.Hex(), got, want) } } + + validtePins(t, db2) } diff --git a/storage/localstore/migration.go b/storage/localstore/migration.go index 54b2340cff..27455abf44 100644 --- a/storage/localstore/migration.go +++ b/storage/localstore/migration.go @@ -48,6 +48,9 @@ func (e *BreakingMigrationError) Error() string { return "breaking migration" } +// Migrate checks the schema name in storage dir and compares it +// with the expected schema name to construct a series of data migrations +// if they are required. func (db *DB) Migrate() (err error) { schemaName, err := db.schemaName.Get() if err != nil { diff --git a/storage/localstore/mode_set.go b/storage/localstore/mode_set.go index 950b01bd4e..2a786f64bd 100644 --- a/storage/localstore/mode_set.go +++ b/storage/localstore/mode_set.go @@ -101,7 +101,7 @@ func (db *DB) set(mode chunk.ModeSet, addrs ...chunk.Address) (err error) { case chunk.ModeSetPin: for _, addr := range addrs { - err := db.setPin(batch, addr) + err := db.setPin(batch, addr, 1) if err != nil { return err } @@ -326,7 +326,7 @@ func (db *DB) setRemove(batch *leveldb.Batch, addr chunk.Address) (gcSizeChange // setPin increments pin counter for the chunk by updating // pin index and sets the chunk to be excluded from garbage collection. // Provided batch is updated. -func (db *DB) setPin(batch *leveldb.Batch, addr chunk.Address) (err error) { +func (db *DB) setPin(batch *leveldb.Batch, addr chunk.Address, count uint64) (err error) { item := addressToItem(addr) // Get the existing pin counter of the chunk @@ -346,8 +346,8 @@ func (db *DB) setPin(batch *leveldb.Batch, addr chunk.Address) (err error) { existingPinCounter = pinnedChunk.PinCounter } - // Otherwise increase the existing counter by 1 - item.PinCounter = existingPinCounter + 1 + // Otherwise increase the existing counter by the pin count + item.PinCounter = existingPinCounter + count db.pinIndex.PutInBatch(batch, item) return nil From 7aed3b1be945689975f90ff3d37827a0ea795208 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 12 Dec 2019 19:10:10 +0100 Subject: [PATCH 07/23] storage/{fcds,localstore}: address Viktor's comments --- storage/fcds/fcds.go | 185 +++++++++++++++++-------------- storage/fcds/mock/mock.go | 4 +- storage/fcds/mock/mock_test.go | 2 +- storage/fcds/test/store.go | 2 +- storage/localstore/gc.go | 1 - storage/localstore/localstore.go | 4 +- 6 files changed, 106 insertions(+), 92 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index fe23efbca7..c49e8395b1 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -52,32 +52,30 @@ var ErrDBClosed = errors.New("closed database") // Store is the main FCDS implementation. It stores chunk data into // a number of files partitioned by the last byte of the chunk address. type Store struct { - shards map[uint8]*os.File // relations with shard id and a shard file - shardsMu map[uint8]*sync.Mutex // mutex for every shard file - meta MetaStore // stores chunk offsets - free map[uint8]struct{} // which shards have free offsets - freeMu sync.RWMutex // protects free field - freeCache *offsetCache // optional cache of free offset values - wg sync.WaitGroup // blocks Close until all other method calls are done - maxChunkSize int // maximal chunk data size - quit chan struct{} // quit disables all operations after Close is called - quitOnce sync.Once // protects close channel from multiple Close calls + shards []shard // relations with shard id and a shard file and their mutexes + meta MetaStore // stores chunk offsets + free []bool // which shards have free offsets + freeMu sync.RWMutex // protects free field + freeCache *offsetCache // optional cache of free offset values + wg sync.WaitGroup // blocks Close until all other method calls are done + maxChunkSize int // maximal chunk data size + quit chan struct{} // quit disables all operations after Close is called + quitOnce sync.Once // protects quit channel from multiple Close calls } -// NewStore constructs a new Store with files at path, with specified max chunk size. +// New constructs a new Store with files at path, with specified max chunk size. // Argument withCache enables in memory cache of free chunk data positions in files. -func NewStore(path string, maxChunkSize int, metaStore MetaStore, withCache bool) (s *Store, err error) { +func New(path string, maxChunkSize int, metaStore MetaStore, withCache bool) (s *Store, err error) { if err := os.MkdirAll(path, 0777); err != nil { return nil, err } - shards := make(map[byte]*os.File, shardCount) - shardsMu := make(map[uint8]*sync.Mutex) + shards := make([]shard, shardCount) for i := byte(0); i < shardCount; i++ { - shards[i], err = os.OpenFile(filepath.Join(path, fmt.Sprintf("chunks-%v.db", i)), os.O_CREATE|os.O_RDWR, 0666) + shards[i].f, err = os.OpenFile(filepath.Join(path, fmt.Sprintf("chunks-%v.db", i)), os.O_CREATE|os.O_RDWR, 0666) if err != nil { return nil, err } - shardsMu[i] = new(sync.Mutex) + shards[i].mu = new(sync.Mutex) } var freeCache *offsetCache if withCache { @@ -85,10 +83,9 @@ func NewStore(path string, maxChunkSize int, metaStore MetaStore, withCache bool } return &Store{ shards: shards, - shardsMu: shardsMu, meta: metaStore, freeCache: freeCache, - free: make(map[uint8]struct{}), + free: make([]bool, shardCount), maxChunkSize: maxChunkSize, quit: make(chan struct{}), }, nil @@ -102,16 +99,16 @@ func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { } defer done() - mu := s.shardsMu[getShard(addr)] - mu.Lock() - defer mu.Unlock() + sh := s.shards[getShard(addr)] + sh.mu.Lock() + defer sh.mu.Unlock() m, err := s.getMeta(addr) if err != nil { return nil, err } data := make([]byte, m.Size) - n, err := s.shards[getShard(addr)].ReadAt(data, m.Offset) + n, err := sh.f.ReadAt(data, m.Offset) if err != nil && err != io.EOF { return nil, err } @@ -129,7 +126,7 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { } defer done() - mu := s.shardsMu[getShard(addr)] + mu := s.shards[getShard(addr)].mu mu.Lock() defer mu.Unlock() @@ -152,67 +149,36 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { defer done() addr := ch.Address() - shard := getShard(addr) - f := s.shards[shard] data := ch.Data() + section := make([]byte, s.maxChunkSize) copy(section, data) - s.freeMu.RLock() - _, hasFree := s.free[shard] - s.freeMu.RUnlock() + shard := getShard(addr) + sh := s.shards[shard] - var offset int64 - var reclaimed bool - mu := s.shardsMu[shard] - mu.Lock() - if hasFree { - var freeOffset int64 = -1 - if s.freeCache != nil { - freeOffset = s.freeCache.get(shard) - } - if freeOffset < 0 { - freeOffset, err = s.meta.FreeOffset(shard) - if err != nil { - return err - } - } - if freeOffset < 0 { - offset, err = f.Seek(0, io.SeekEnd) - if err != nil { - mu.Unlock() - return err - } - s.freeMu.Lock() - delete(s.free, shard) - s.freeMu.Unlock() - } else { - offset, err = f.Seek(freeOffset, io.SeekStart) - if err != nil { - mu.Unlock() - return err - } - reclaimed = true - } + sh.mu.Lock() + defer sh.mu.Unlock() + + offset, reclaimed, err := s.getOffset(shard) + if err != nil { + return err + } + + if offset < 0 { + offset, err = sh.f.Seek(0, io.SeekEnd) } else { - offset, err = f.Seek(0, io.SeekEnd) - if err != nil { - mu.Unlock() - return err - } + _, err = sh.f.Seek(offset, io.SeekStart) } - _, err = f.Write(section) if err != nil { - mu.Unlock() return err } - if reclaimed { - if s.freeCache != nil { - s.freeCache.remove(shard, offset) - } - defer mu.Unlock() - } else { - mu.Unlock() + + if _, err = sh.f.Write(section); err != nil { + return err + } + if reclaimed && s.freeCache != nil { + s.freeCache.remove(shard, offset) } return s.meta.Set(addr, shard, reclaimed, &Meta{ Size: uint16(len(data)), @@ -220,6 +186,39 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { }) } +// getOffset returns an offset where chunk data can be written to +// and a flag if the offset is reclaimed from a previously removed chunk. +// If offset is less then 0, no free offsets are available. +func (s *Store) getOffset(shard uint8) (offset int64, reclaimed bool, err error) { + if !s.shardHasFreeOffsets(shard) { + // shard does not have free offset + return -1, false, err + } + + offset = -1 // negative offset denotes no available free offset + if s.freeCache != nil { + // check if local cache has an offset + offset = s.freeCache.get(shard) + } + + if offset < 0 { + // free cache did not return a free offset, + // check the meta store for one + offset, err = s.meta.FreeOffset(shard) + if err != nil { + return 0, false, err + } + } + if offset < 0 { + // meta store did not return a free offset, + // mark this shard that has no free offsets + s.markShardWithFreeOffsets(shard, false) + return -1, false, nil + } + + return offset, true, nil +} + // Delete removes chunk data. func (s *Store) Delete(addr chunk.Address) (err error) { done, err := s.protect() @@ -229,11 +228,9 @@ func (s *Store) Delete(addr chunk.Address) (err error) { defer done() shard := getShard(addr) - s.freeMu.Lock() - s.free[shard] = struct{}{} - s.freeMu.Unlock() + s.markShardWithFreeOffsets(shard, true) - mu := s.shardsMu[shard] + mu := s.shards[shard].mu mu.Lock() defer mu.Unlock() @@ -260,18 +257,18 @@ func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) } defer done() - for _, mu := range s.shardsMu { - mu.Lock() + for _, sh := range s.shards { + sh.mu.Lock() } defer func() { - for _, mu := range s.shardsMu { - mu.Unlock() + for _, sh := range s.shards { + sh.mu.Unlock() } }() return s.meta.Iterate(func(addr chunk.Address, m *Meta) (stop bool, err error) { data := make([]byte, m.Size) - _, err = s.shards[getShard(addr)].ReadAt(data, m.Offset) + _, err = s.shards[getShard(addr)].f.ReadAt(data, m.Offset) if err != nil { return true, err } @@ -298,8 +295,8 @@ func (s *Store) Close() (err error) { case <-time.After(15 * time.Second): } - for _, f := range s.shards { - if err := f.Close(); err != nil { + for _, sh := range s.shards { + if err := sh.f.Close(); err != nil { return err } } @@ -327,7 +324,25 @@ func (s *Store) getMeta(addr chunk.Address) (m *Meta, err error) { return s.meta.Get(addr) } +func (s *Store) markShardWithFreeOffsets(shard uint8, has bool) { + s.freeMu.Lock() + s.free[shard] = has + s.freeMu.Unlock() +} + +func (s *Store) shardHasFreeOffsets(shard uint8) (has bool) { + s.freeMu.RLock() + has = s.free[shard] + s.freeMu.RUnlock() + return has +} + // getShard returns a shard number for the chunk address. func getShard(addr chunk.Address) (shard uint8) { return addr[len(addr)-1] % shardCount } + +type shard struct { + f *os.File + mu *sync.Mutex +} diff --git a/storage/fcds/mock/mock.go b/storage/fcds/mock/mock.go index d37c479169..21aeb45f1b 100644 --- a/storage/fcds/mock/mock.go +++ b/storage/fcds/mock/mock.go @@ -30,9 +30,9 @@ type Store struct { m *mock.NodeStore } -// NewStore returns a new store with mock NodeStore +// New returns a new store with mock NodeStore // for storing Chunk data. -func NewStore(m *mock.NodeStore) (s *Store) { +func New(m *mock.NodeStore) (s *Store) { return &Store{ m: m, } diff --git a/storage/fcds/mock/mock_test.go b/storage/fcds/mock/mock_test.go index 56bc02701e..1958d241f9 100644 --- a/storage/fcds/mock/mock_test.go +++ b/storage/fcds/mock/mock_test.go @@ -29,7 +29,7 @@ import ( // TestFCDS runs a standard series of tests on mock Store implementation. func TestFCDS(t *testing.T) { test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { - return mock.NewStore( + return mock.New( mem.NewGlobalStore().NewNodeStore( common.BytesToAddress(make([]byte, 20)), ), diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index 1f663bf6aa..e8c56e8dd6 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -297,7 +297,7 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. t.Fatal(err) } - s, err = fcds.NewStore(path, chunk.DefaultSize, metaStore, !*noCacheFlag) + s, err = fcds.New(path, chunk.DefaultSize, metaStore, !*noCacheFlag) if err != nil { os.RemoveAll(path) t.Fatal(err) diff --git a/storage/localstore/gc.go b/storage/localstore/gc.go index 8c7379440f..7e46437e62 100644 --- a/storage/localstore/gc.go +++ b/storage/localstore/gc.go @@ -120,7 +120,6 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { metrics.GetOrRegisterGauge(metricName+".accessts", nil).Update(item.AccessTimestamp) // delete from retrieve, pull, gc - //db.retrievalDataIndex.DeleteInBatch(batch, item) addrs = append(addrs, item.Address) db.metaIndex.DeleteInBatch(batch, item) db.pullIndex.DeleteInBatch(batch, item) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 3d80da4dc5..79c296057b 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -226,7 +226,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { if err != nil { return nil, err } - db.data, err = fcds.NewStore( + db.data, err = fcds.New( filepath.Join(path, "data"), chunk.DefaultSize+8, // chunk data has additional 8 bytes prepended metaStore, @@ -237,7 +237,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { } } else { // Mock store is provided, use mock FCDS. - db.data = fcdsmock.NewStore(o.MockStore) + db.data = fcdsmock.New(o.MockStore) } // Index storing bin id, store and access timestamp for a particular address. // It is needed in order to update gc index keys for iteration order. From 26bcb48cd2d49c22e8100a84b530a5a5da863345 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Fri, 13 Dec 2019 12:49:55 +0100 Subject: [PATCH 08/23] storage/fcds: correctly return explicit nil in getOffset --- storage/fcds/fcds.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index c49e8395b1..43991e7008 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -192,7 +192,7 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { func (s *Store) getOffset(shard uint8) (offset int64, reclaimed bool, err error) { if !s.shardHasFreeOffsets(shard) { // shard does not have free offset - return -1, false, err + return -1, false, nil } offset = -1 // negative offset denotes no available free offset From 1e946806bbf03db246aaf6ef24d83ddd1119e2d5 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Fri, 13 Dec 2019 15:41:26 +0100 Subject: [PATCH 09/23] storage/fcds: add WithCache optional argument to New constructor --- storage/fcds/fcds.go | 46 ++++++++++++++++++++------------ storage/fcds/test/store.go | 2 +- storage/localstore/localstore.go | 2 +- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 43991e7008..d1c839a2a6 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -63,32 +63,44 @@ type Store struct { quitOnce sync.Once // protects quit channel from multiple Close calls } +// Option is an optional argument passed to New. +type Option func(*Store) + +// WithCache is an optional argument to New constructor that enables +// in memory cache of free chunk data positions in files +func WithCache(yes bool) Option { + return func(s *Store) { + if yes { + s.freeCache = newOffsetCache(shardCount) + } else { + s.freeCache = nil + } + } +} + // New constructs a new Store with files at path, with specified max chunk size. -// Argument withCache enables in memory cache of free chunk data positions in files. -func New(path string, maxChunkSize int, metaStore MetaStore, withCache bool) (s *Store, err error) { +func New(path string, maxChunkSize int, metaStore MetaStore, opts ...Option) (s *Store, err error) { + s = &Store{ + shards: make([]shard, shardCount), + meta: metaStore, + free: make([]bool, shardCount), + maxChunkSize: maxChunkSize, + quit: make(chan struct{}), + } + for _, o := range opts { + o(s) + } if err := os.MkdirAll(path, 0777); err != nil { return nil, err } - shards := make([]shard, shardCount) for i := byte(0); i < shardCount; i++ { - shards[i].f, err = os.OpenFile(filepath.Join(path, fmt.Sprintf("chunks-%v.db", i)), os.O_CREATE|os.O_RDWR, 0666) + s.shards[i].f, err = os.OpenFile(filepath.Join(path, fmt.Sprintf("chunks-%v.db", i)), os.O_CREATE|os.O_RDWR, 0666) if err != nil { return nil, err } - shards[i].mu = new(sync.Mutex) + s.shards[i].mu = new(sync.Mutex) } - var freeCache *offsetCache - if withCache { - freeCache = newOffsetCache(shardCount) - } - return &Store{ - shards: shards, - meta: metaStore, - freeCache: freeCache, - free: make([]bool, shardCount), - maxChunkSize: maxChunkSize, - quit: make(chan struct{}), - }, nil + return s, nil } // Get returns a chunk with data. diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index e8c56e8dd6..f7dcfa9c69 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -297,7 +297,7 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. t.Fatal(err) } - s, err = fcds.New(path, chunk.DefaultSize, metaStore, !*noCacheFlag) + s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag)) if err != nil { os.RemoveAll(path) t.Fatal(err) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 79c296057b..35631c8524 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -230,7 +230,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { filepath.Join(path, "data"), chunk.DefaultSize+8, // chunk data has additional 8 bytes prepended metaStore, - true, // enable offset cache + fcds.WithCache(true), ) if err != nil { return nil, err From db658c776854e3bfb4796d02820a4e5e625fcb41 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Wed, 18 Dec 2019 13:02:47 +0100 Subject: [PATCH 10/23] storage/fcds: address most of Petar's comments --- storage/fcds/fcds.go | 60 +++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index d1c839a2a6..94227f8536 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -25,6 +25,8 @@ import ( "sync" "time" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/chunk" ) @@ -46,8 +48,8 @@ var _ Interface = new(Store) // Number of files that store chunk data. const shardCount = 32 -// ErrDBClosed is returned if database is already closed. -var ErrDBClosed = errors.New("closed database") +// ErrStoreClosed is returned if store is already closed. +var ErrStoreClosed = errors.New("closed store") // Store is the main FCDS implementation. It stores chunk data into // a number of files partitioned by the last byte of the chunk address. @@ -105,11 +107,10 @@ func New(path string, maxChunkSize int, metaStore MetaStore, opts ...Option) (s // Get returns a chunk with data. func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { - done, err := s.protect() - if err != nil { + if err := s.protect(); err != nil { return nil, err } - defer done() + defer s.unprotect() sh := s.shards[getShard(addr)] sh.mu.Lock() @@ -132,11 +133,10 @@ func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { // Has returns true if chunk is stored. func (s *Store) Has(addr chunk.Address) (yes bool, err error) { - done, err := s.protect() - if err != nil { + if err := s.protect(); err != nil { return false, err } - defer done() + defer s.unprotect() mu := s.shards[getShard(addr)].mu mu.Lock() @@ -154,11 +154,10 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { // Put stores chunk data. func (s *Store) Put(ch chunk.Chunk) (err error) { - done, err := s.protect() - if err != nil { + if err := s.protect(); err != nil { return err } - defer done() + defer s.unprotect() addr := ch.Address() data := ch.Data() @@ -203,27 +202,21 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { // If offset is less then 0, no free offsets are available. func (s *Store) getOffset(shard uint8) (offset int64, reclaimed bool, err error) { if !s.shardHasFreeOffsets(shard) { - // shard does not have free offset return -1, false, nil } - offset = -1 // negative offset denotes no available free offset + offset = -1 if s.freeCache != nil { - // check if local cache has an offset offset = s.freeCache.get(shard) } if offset < 0 { - // free cache did not return a free offset, - // check the meta store for one offset, err = s.meta.FreeOffset(shard) if err != nil { return 0, false, err } } if offset < 0 { - // meta store did not return a free offset, - // mark this shard that has no free offsets s.markShardWithFreeOffsets(shard, false) return -1, false, nil } @@ -233,11 +226,10 @@ func (s *Store) getOffset(shard uint8) (offset int64, reclaimed bool, err error) // Delete removes chunk data. func (s *Store) Delete(addr chunk.Address) (err error) { - done, err := s.protect() - if err != nil { + if err := s.protect(); err != nil { return err } - defer done() + defer s.unprotect() shard := getShard(addr) s.markShardWithFreeOffsets(shard, true) @@ -263,11 +255,10 @@ func (s *Store) Count() (count int, err error) { // Iterate iterates over stored chunks in no particular order. func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) { - done, err := s.protect() - if err != nil { + if err := s.protect(); err != nil { return err } - defer done() + defer s.unprotect() for _, sh := range s.shards { sh.mu.Lock() @@ -289,7 +280,7 @@ func (s *Store) Iterate(fn func(chunk.Chunk) (stop bool, err error)) (err error) } // Close disables of further operations on the Store. -// Every call to its methods will return ErrDBClosed error. +// Every call to its methods will return ErrStoreClosed error. // Close will wait for all running operations to finish before // closing its MetaStore and returning. func (s *Store) Close() (err error) { @@ -297,6 +288,7 @@ func (s *Store) Close() (err error) { close(s.quit) }) + timeout := 15 * time.Second done := make(chan struct{}) go func() { s.wg.Wait() @@ -304,7 +296,8 @@ func (s *Store) Close() (err error) { }() select { case <-done: - case <-time.After(15 * time.Second): + case <-time.After(timeout): + log.Debug("timeout on waiting chunk store parallel operations to finish", "timeout", timeout) } for _, sh := range s.shards { @@ -318,17 +311,22 @@ func (s *Store) Close() (err error) { // protect protects Store from executing operations // after the Close method is called and makes sure // that Close method will wait for all ongoing operations -// to finish before returning. Returned function done +// to finish before returning. Method unprotect done // must be closed to unblock the Close method call. -func (s *Store) protect() (done func(), err error) { +func (s *Store) protect() (err error) { select { case <-s.quit: - // Store is closed. - return nil, ErrDBClosed + return ErrStoreClosed default: } s.wg.Add(1) - return s.wg.Done, nil + return nil +} + +// unprotect removes a protection set by the protect method +// allowing the Close method to unblock. +func (s *Store) unprotect() { + s.wg.Done() } // getMeta returns Meta information from MetaStore. From 26f662683628d77eba584d7c84c626fa7ab91d56 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Wed, 18 Dec 2019 14:16:03 +0100 Subject: [PATCH 11/23] storage/fcds: add offsetCache ttl --- storage/fcds/fcds.go | 4 +-- storage/fcds/offsetcache.go | 60 +++++++++++++++++++++++++++----- storage/fcds/test/store.go | 3 +- storage/localstore/localstore.go | 2 +- 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 94227f8536..86c93f6826 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -70,10 +70,10 @@ type Option func(*Store) // WithCache is an optional argument to New constructor that enables // in memory cache of free chunk data positions in files -func WithCache(yes bool) Option { +func WithCache(yes bool, ttl time.Duration) Option { return func(s *Store) { if yes { - s.freeCache = newOffsetCache(shardCount) + s.freeCache = newOffsetCache(shardCount, ttl) } else { s.freeCache = nil } diff --git a/storage/fcds/offsetcache.go b/storage/fcds/offsetcache.go index 66311fdbc1..6a0ce0cfa9 100644 --- a/storage/fcds/offsetcache.go +++ b/storage/fcds/offsetcache.go @@ -16,24 +16,35 @@ package fcds -import "sync" +import ( + "sync" + "time" +) // offsetCache is a simple cache of offset integers // by shard files. type offsetCache struct { - m map[uint8]map[int64]struct{} - mu sync.RWMutex + m map[uint8]map[int64]time.Time + ttl time.Duration + mu sync.RWMutex + quit chan struct{} + quitOnce sync.Once } // newOffsetCache constructs offsetCache for a fixed number of shards. -func newOffsetCache(shardCount uint8) (c *offsetCache) { - m := make(map[uint8]map[int64]struct{}) +func newOffsetCache(shardCount uint8, ttl time.Duration) (c *offsetCache) { + m := make(map[uint8]map[int64]time.Time) for i := uint8(0); i < shardCount; i++ { - m[i] = make(map[int64]struct{}) + m[i] = make(map[int64]time.Time) } - return &offsetCache{ - m: m, + c = &offsetCache{ + m: m, + quit: make(chan struct{}), } + if ttl > 0 { + go c.cleanup(30 * time.Second) + } + return c } // get returns a free offset in a shard. If the returned @@ -52,7 +63,7 @@ func (c *offsetCache) get(shard uint8) (offset int64) { // set sets a free offset for a shard file. func (c *offsetCache) set(shard uint8, offset int64) { c.mu.Lock() - c.m[shard][offset] = struct{}{} + c.m[shard][offset] = time.Now().Add(c.ttl) c.mu.Unlock() } @@ -62,3 +73,34 @@ func (c *offsetCache) remove(shard uint8, offset int64) { delete(c.m[shard], offset) c.mu.Unlock() } + +// close stops parallel processing created +// by offsetCache. +func (c *offsetCache) close() { + c.quitOnce.Do(func() { + close(c.quit) + }) +} + +func (c *offsetCache) cleanup(period time.Duration) { + ticker := time.NewTicker(period) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + now := time.Now() + c.mu.Lock() + for _, s := range c.m { + for offset, expiration := range s { + if now.After(expiration) { + delete(s, offset) + } + } + } + c.mu.Unlock() + case <-c.quit: + return + } + } +} diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index f7dcfa9c69..9e7e68b537 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -25,6 +25,7 @@ import ( "os" "sync" "testing" + "time" "github.com/ethersphere/swarm/chunk" chunktesting "github.com/ethersphere/swarm/chunk/testing" @@ -297,7 +298,7 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. t.Fatal(err) } - s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag)) + s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag, time.Hour)) if err != nil { os.RemoveAll(path) t.Fatal(err) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 35631c8524..fadd42002c 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -230,7 +230,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { filepath.Join(path, "data"), chunk.DefaultSize+8, // chunk data has additional 8 bytes prepended metaStore, - fcds.WithCache(true), + fcds.WithCache(true, time.Hour), ) if err != nil { return nil, err From 5be3c2550d152b878caaa5f2546384584bb9e864 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Wed, 18 Dec 2019 16:39:48 +0100 Subject: [PATCH 12/23] Revert "storage/fcds: add offsetCache ttl" This reverts commit 26f662683628d77eba584d7c84c626fa7ab91d56. --- storage/fcds/fcds.go | 4 +-- storage/fcds/offsetcache.go | 60 +++++--------------------------- storage/fcds/test/store.go | 3 +- storage/localstore/localstore.go | 2 +- 4 files changed, 13 insertions(+), 56 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 86c93f6826..94227f8536 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -70,10 +70,10 @@ type Option func(*Store) // WithCache is an optional argument to New constructor that enables // in memory cache of free chunk data positions in files -func WithCache(yes bool, ttl time.Duration) Option { +func WithCache(yes bool) Option { return func(s *Store) { if yes { - s.freeCache = newOffsetCache(shardCount, ttl) + s.freeCache = newOffsetCache(shardCount) } else { s.freeCache = nil } diff --git a/storage/fcds/offsetcache.go b/storage/fcds/offsetcache.go index 6a0ce0cfa9..66311fdbc1 100644 --- a/storage/fcds/offsetcache.go +++ b/storage/fcds/offsetcache.go @@ -16,35 +16,24 @@ package fcds -import ( - "sync" - "time" -) +import "sync" // offsetCache is a simple cache of offset integers // by shard files. type offsetCache struct { - m map[uint8]map[int64]time.Time - ttl time.Duration - mu sync.RWMutex - quit chan struct{} - quitOnce sync.Once + m map[uint8]map[int64]struct{} + mu sync.RWMutex } // newOffsetCache constructs offsetCache for a fixed number of shards. -func newOffsetCache(shardCount uint8, ttl time.Duration) (c *offsetCache) { - m := make(map[uint8]map[int64]time.Time) +func newOffsetCache(shardCount uint8) (c *offsetCache) { + m := make(map[uint8]map[int64]struct{}) for i := uint8(0); i < shardCount; i++ { - m[i] = make(map[int64]time.Time) + m[i] = make(map[int64]struct{}) } - c = &offsetCache{ - m: m, - quit: make(chan struct{}), + return &offsetCache{ + m: m, } - if ttl > 0 { - go c.cleanup(30 * time.Second) - } - return c } // get returns a free offset in a shard. If the returned @@ -63,7 +52,7 @@ func (c *offsetCache) get(shard uint8) (offset int64) { // set sets a free offset for a shard file. func (c *offsetCache) set(shard uint8, offset int64) { c.mu.Lock() - c.m[shard][offset] = time.Now().Add(c.ttl) + c.m[shard][offset] = struct{}{} c.mu.Unlock() } @@ -73,34 +62,3 @@ func (c *offsetCache) remove(shard uint8, offset int64) { delete(c.m[shard], offset) c.mu.Unlock() } - -// close stops parallel processing created -// by offsetCache. -func (c *offsetCache) close() { - c.quitOnce.Do(func() { - close(c.quit) - }) -} - -func (c *offsetCache) cleanup(period time.Duration) { - ticker := time.NewTicker(period) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - now := time.Now() - c.mu.Lock() - for _, s := range c.m { - for offset, expiration := range s { - if now.After(expiration) { - delete(s, offset) - } - } - } - c.mu.Unlock() - case <-c.quit: - return - } - } -} diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index 9e7e68b537..f7dcfa9c69 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -25,7 +25,6 @@ import ( "os" "sync" "testing" - "time" "github.com/ethersphere/swarm/chunk" chunktesting "github.com/ethersphere/swarm/chunk/testing" @@ -298,7 +297,7 @@ func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds. t.Fatal(err) } - s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag, time.Hour)) + s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag)) if err != nil { os.RemoveAll(path) t.Fatal(err) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index fadd42002c..35631c8524 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -230,7 +230,7 @@ func New(path string, baseKey []byte, o *Options) (db *DB, err error) { filepath.Join(path, "data"), chunk.DefaultSize+8, // chunk data has additional 8 bytes prepended metaStore, - fcds.WithCache(true, time.Hour), + fcds.WithCache(true), ) if err != nil { return nil, err From 661a7f5d5f8252a348201194ce7029166456a672 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 17:32:36 +0100 Subject: [PATCH 13/23] storage/fcds: rename fcds.Interface to fcds.Storer --- storage/fcds/fcds.go | 6 +++--- storage/fcds/leveldb/leveldb_test.go | 2 +- storage/fcds/mem/mem_test.go | 2 +- storage/fcds/mock/mock.go | 2 +- storage/fcds/mock/mock_test.go | 2 +- storage/fcds/test/store.go | 6 +++--- storage/localstore/localstore.go | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 94227f8536..5a90a5d1e9 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -30,10 +30,10 @@ import ( "github.com/ethersphere/swarm/chunk" ) -// Interface specifies methods required for FCDS implementation. +// Storer specifies methods required for FCDS implementation. // It can be used where alternative implementations are needed to // switch at runtime. -type Interface interface { +type Storer interface { Get(addr chunk.Address) (ch chunk.Chunk, err error) Has(addr chunk.Address) (yes bool, err error) Put(ch chunk.Chunk) (err error) @@ -43,7 +43,7 @@ type Interface interface { Close() (err error) } -var _ Interface = new(Store) +var _ Storer = new(Store) // Number of files that store chunk data. const shardCount = 32 diff --git a/storage/fcds/leveldb/leveldb_test.go b/storage/fcds/leveldb/leveldb_test.go index 25aa66e637..c960c2df61 100644 --- a/storage/fcds/leveldb/leveldb_test.go +++ b/storage/fcds/leveldb/leveldb_test.go @@ -29,7 +29,7 @@ import ( // TestFCDS runs a standard series of tests on main Store implementation // with LevelDB meta store. func TestFCDS(t *testing.T) { - test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Storer, func()) { path, err := ioutil.TempDir("", "swarm-fcds-") if err != nil { t.Fatal(err) diff --git a/storage/fcds/mem/mem_test.go b/storage/fcds/mem/mem_test.go index 56e372ac35..288ab47157 100644 --- a/storage/fcds/mem/mem_test.go +++ b/storage/fcds/mem/mem_test.go @@ -28,7 +28,7 @@ import ( // TestFCDS runs a standard series of tests on main Store implementation // with in-memory meta store. func TestFCDS(t *testing.T) { - test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Storer, func()) { path, err := ioutil.TempDir("", "swarm-fcds-") if err != nil { t.Fatal(err) diff --git a/storage/fcds/mock/mock.go b/storage/fcds/mock/mock.go index 21aeb45f1b..b4b7503ce4 100644 --- a/storage/fcds/mock/mock.go +++ b/storage/fcds/mock/mock.go @@ -22,7 +22,7 @@ import ( "github.com/ethersphere/swarm/storage/mock" ) -var _ fcds.Interface = new(Store) +var _ fcds.Storer = new(Store) // Store implements FCDS Interface by using mock // store for persistence. diff --git a/storage/fcds/mock/mock_test.go b/storage/fcds/mock/mock_test.go index 1958d241f9..49029c608a 100644 --- a/storage/fcds/mock/mock_test.go +++ b/storage/fcds/mock/mock_test.go @@ -28,7 +28,7 @@ import ( // TestFCDS runs a standard series of tests on mock Store implementation. func TestFCDS(t *testing.T) { - test.RunAll(t, func(t *testing.T) (fcds.Interface, func()) { + test.RunAll(t, func(t *testing.T) (fcds.Storer, func()) { return mock.New( mem.NewGlobalStore().NewNodeStore( common.BytesToAddress(make([]byte, 20)), diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index f7dcfa9c69..4a6a3ff464 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -44,7 +44,7 @@ func Main(m *testing.M) { } // RunAll runs all available tests for a Store implementation. -func RunAll(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { +func RunAll(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Storer, func())) { t.Run("empty", func(t *testing.T) { RunStore(t, &RunStoreOptions{ @@ -106,7 +106,7 @@ func RunAll(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func( // RunStoreOptions define parameters for Store test function. type RunStoreOptions struct { - NewStoreFunc func(t *testing.T) (fcds.Interface, func()) + NewStoreFunc func(t *testing.T) (fcds.Storer, func()) ChunkCount int DeleteSplit int Cleaned bool @@ -245,7 +245,7 @@ func RunStore(t *testing.T, o *RunStoreOptions) { } // RunIterator validates behaviour of Iterate and Count methods on a Store. -func RunIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Interface, func())) { +func RunIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Storer, func())) { chunkCount := 1000 db, clean := newStoreFunc(t) diff --git a/storage/localstore/localstore.go b/storage/localstore/localstore.go index 35631c8524..ffe86860ed 100644 --- a/storage/localstore/localstore.go +++ b/storage/localstore/localstore.go @@ -68,7 +68,7 @@ type DB struct { schemaName shed.StringField // chunk data storage - data fcds.Interface + data fcds.Storer // bin index and timestamps index metaIndex shed.Index // legacy data index, used only in export for manual migration From f51c6d8b8eae7d03ba3128f0884bae76b9200242 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 17:47:45 +0100 Subject: [PATCH 14/23] storage/fcds: improve some commenting --- storage/fcds/fcds.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 5a90a5d1e9..49e56164e5 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -177,8 +177,13 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { } if offset < 0 { + // no free offsets found, + // append the chunk data by + // seeking to the end of the file offset, err = sh.f.Seek(0, io.SeekEnd) } else { + // seek to the offset position + // to replace the chunk data at that position _, err = sh.f.Seek(offset, io.SeekStart) } if err != nil { @@ -224,7 +229,7 @@ func (s *Store) getOffset(shard uint8) (offset int64, reclaimed bool, err error) return offset, true, nil } -// Delete removes chunk data. +// Delete makes the chunk unavailable. func (s *Store) Delete(addr chunk.Address) (err error) { if err := s.protect(); err != nil { return err From 91fc21fc1fe4b89e77630262f5043564e47d38b6 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 17:48:11 +0100 Subject: [PATCH 15/23] storage/localstore: improve comment in the Import method --- storage/localstore/export.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/localstore/export.go b/storage/localstore/export.go index c758e616e8..564086fd7b 100644 --- a/storage/localstore/export.go +++ b/storage/localstore/export.go @@ -190,8 +190,8 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { if strings.HasPrefix(hdr.Name, tagsFilenamePrefix) { // All chunks are put before tag files are iterated on // because of tagsFilenamePrefix starts with "t" - // which is ordered later then hex characters of chunk - // addresses. + // which is ordered later in the tar file then + // hex characters of chunk addresses. // // Wait for chunks to be stored before continuing. wg.Wait() From 60e3938362c524b45b5deac26bf39bf655e92471 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 17:50:10 +0100 Subject: [PATCH 16/23] storage/localstore: improve migrateDiwali migration message --- storage/localstore/migration.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/localstore/migration.go b/storage/localstore/migration.go index 27455abf44..88c6c07d47 100644 --- a/storage/localstore/migration.go +++ b/storage/localstore/migration.go @@ -225,9 +225,9 @@ func migrateSanctuary(db *DB) error { func migrateDiwali(db *DB) error { return NewBreakingMigrationError(fmt.Sprintf(` -Swarm chunk storage layer is changed. +Swarm chunk storage layer has changed. -You can choose if you want to do a manual migration or to discard current data. +You can choose either to manually migrate the data in your local store to the new data store or to discard the data altogether. Preserving data requires additional storage roughly the size of the data directory and may take longer time depending on storage performance. From c542f32fb493ac41741149c0149f722800ab009d Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 18:00:12 +0100 Subject: [PATCH 17/23] storage/fcds: ensure that chunk data is no longer the the max value --- storage/fcds/fcds.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 49e56164e5..c389d36f6b 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -162,6 +162,11 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { addr := ch.Address() data := ch.Data() + size := len(data) + if size > s.maxChunkSize { + return fmt.Errorf("chunk data size %v exceeds %v bytes", size, s.maxChunkSize) + } + section := make([]byte, s.maxChunkSize) copy(section, data) @@ -197,7 +202,7 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { s.freeCache.remove(shard, offset) } return s.meta.Set(addr, shard, reclaimed, &Meta{ - Size: uint16(len(data)), + Size: uint16(size), Offset: offset, }) } From 0fc5e3ada6ef8da705f66176edb850a511da3a15 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Tue, 14 Jan 2020 18:04:11 +0100 Subject: [PATCH 18/23] storage/localstore: terminate import goroutine in case of errors --- storage/localstore/export.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/storage/localstore/export.go b/storage/localstore/export.go index 564086fd7b..296cbc90a3 100644 --- a/storage/localstore/export.go +++ b/storage/localstore/export.go @@ -171,7 +171,9 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { } select { case errC <- err: + return case <-ctx.Done(): + return } } // get the export file format version @@ -180,7 +182,9 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { if err != nil { select { case errC <- err: + return case <-ctx.Done(): + return } } version = string(data) @@ -206,7 +210,9 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { if err := db.setPin(batch, addr, counter); err != nil { select { case errC <- err: + return case <-ctx.Done(): + return } } } @@ -214,14 +220,18 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { if err := scanner.Err(); err != nil { select { case errC <- err: + return case <-ctx.Done(): + return } } if err := db.shed.WriteBatch(batch); err != nil { select { case errC <- err: + return case <-ctx.Done(): + return } } continue @@ -242,7 +252,9 @@ func (db *DB) Import(r io.Reader, legacy bool) (count int64, err error) { if err != nil { select { case errC <- err: + return case <-ctx.Done(): + return } } key := chunk.Address(keybytes) From 842f7d801cadce01263a9d45fed7347c089a6b6d Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 5 Mar 2020 15:36:45 +0100 Subject: [PATCH 19/23] storage/localstore: do not put existing chunks --- storage/localstore/mode_put.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/storage/localstore/mode_put.go b/storage/localstore/mode_put.go index 815ac4f6bd..c4bdf7efa7 100644 --- a/storage/localstore/mode_put.go +++ b/storage/localstore/mode_put.go @@ -141,9 +141,11 @@ func (db *DB) put(mode chunk.ModePut, chs ...chunk.Chunk) (exist []bool, err err return nil, err } - for _, ch := range chs { - if err := db.data.Put(ch); err != nil { - return nil, err + for i, ch := range chs { + if !exist[i] { + if err := db.data.Put(ch); err != nil { + return nil, err + } } } From d9341b83dc8f00179f25a049b52381be4f0273e7 Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 5 Mar 2020 15:37:16 +0100 Subject: [PATCH 20/23] storage/fcds/test: correctly handle storage path --- storage/fcds/test/store.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/storage/fcds/test/store.go b/storage/fcds/test/store.go index 4a6a3ff464..0580c16820 100644 --- a/storage/fcds/test/store.go +++ b/storage/fcds/test/store.go @@ -20,7 +20,6 @@ import ( "bytes" "flag" "fmt" - "io/ioutil" "math/rand" "os" "sync" @@ -292,12 +291,7 @@ func RunIterator(t *testing.T, newStoreFunc func(t *testing.T) (fcds.Storer, fun func NewFCDSStore(t *testing.T, path string, metaStore fcds.MetaStore) (s *fcds.Store, clean func()) { t.Helper() - path, err := ioutil.TempDir("", "swarm-fcds") - if err != nil { - t.Fatal(err) - } - - s, err = fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag)) + s, err := fcds.New(path, chunk.DefaultSize, metaStore, fcds.WithCache(!*noCacheFlag)) if err != nil { os.RemoveAll(path) t.Fatal(err) From 0f13d3bcc1c2ef5df17a2ae214f7f3d7123b0acd Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 5 Mar 2020 15:37:46 +0100 Subject: [PATCH 21/23] strage/fcds: check if chunk exists before it is put --- storage/fcds/fcds.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index c389d36f6b..5e7d773d83 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -176,6 +176,15 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { sh.mu.Lock() defer sh.mu.Unlock() + _, err = s.getMeta(addr) + switch err { + case chunk.ErrChunkNotFound: + case nil: + return nil + default: + return err + } + offset, reclaimed, err := s.getOffset(shard) if err != nil { return err From 4b6f726450132b4399a77034d8a8a6cd1851bd2c Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Thu, 5 Mar 2020 15:53:00 +0100 Subject: [PATCH 22/23] storage/fcds: add and use MetaStore.Has --- storage/fcds/fcds.go | 20 ++++++-------------- storage/fcds/leveldb/leveldb.go | 11 +++++++++++ storage/fcds/mem/mem.go | 8 ++++++++ storage/fcds/meta.go | 1 + 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index 5e7d773d83..a585308f06 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -142,14 +142,7 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { mu.Lock() defer mu.Unlock() - _, err = s.getMeta(addr) - if err != nil { - if err == chunk.ErrChunkNotFound { - return false, nil - } - return false, err - } - return true, nil + return s.meta.Has(addr) } // Put stores chunk data. @@ -176,14 +169,13 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { sh.mu.Lock() defer sh.mu.Unlock() - _, err = s.getMeta(addr) - switch err { - case chunk.ErrChunkNotFound: - case nil: - return nil - default: + has, err := s.meta.Has(addr) + if err != nil { return err } + if has { + return nil + } offset, reclaimed, err := s.getOffset(shard) if err != nil { diff --git a/storage/fcds/leveldb/leveldb.go b/storage/fcds/leveldb/leveldb.go index 1c54a7eb0c..de5f92b7cc 100644 --- a/storage/fcds/leveldb/leveldb.go +++ b/storage/fcds/leveldb/leveldb.go @@ -60,6 +60,17 @@ func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { return m, nil } +// Has returns true if chunk has meta information stored. +func (s *MetaStore) Has(addr chunk.Address) (yes bool, err error) { + if _, err = s.db.Get(chunkKey(addr), nil); err != nil { + if err == leveldb.ErrNotFound { + return false, nil + } + return false, err + } + return true, nil +} + // Set adds a new chunk meta information for a shard. // Reclaimed flag denotes that the chunk is at the place of // already deleted chunk, not appended to the end of the file. diff --git a/storage/fcds/mem/mem.go b/storage/fcds/mem/mem.go index 4d4c5d1750..3f38c570f8 100644 --- a/storage/fcds/mem/mem.go +++ b/storage/fcds/mem/mem.go @@ -56,6 +56,14 @@ func (s *MetaStore) Get(addr chunk.Address) (m *fcds.Meta, err error) { return m, nil } +// Get returns true is meta information is stored. +func (s *MetaStore) Has(addr chunk.Address) (yes bool, err error) { + s.mu.RLock() + _, yes = s.meta[string(addr)] + s.mu.RUnlock() + return yes, nil +} + // Set adds a new chunk meta information for a shard. // Reclaimed flag denotes that the chunk is at the place of // already deleted chunk, not appended to the end of the file. diff --git a/storage/fcds/meta.go b/storage/fcds/meta.go index 68fb95e225..f0c9cc4e2b 100644 --- a/storage/fcds/meta.go +++ b/storage/fcds/meta.go @@ -27,6 +27,7 @@ import ( // chunk meta information in Store FCDS implementation. type MetaStore interface { Get(addr chunk.Address) (*Meta, error) + Has(addr chunk.Address) (bool, error) Set(addr chunk.Address, shard uint8, reclaimed bool, m *Meta) error Remove(addr chunk.Address, shard uint8) error Count() (int, error) From 39d328ac4671e53aaa91ed4cc456af8b233db23a Mon Sep 17 00:00:00 2001 From: Janos Guljas Date: Fri, 6 Mar 2020 13:29:44 +0100 Subject: [PATCH 23/23] storage/fcds: optimize locking --- storage/fcds/fcds.go | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/storage/fcds/fcds.go b/storage/fcds/fcds.go index a585308f06..2e7fe2e1c9 100644 --- a/storage/fcds/fcds.go +++ b/storage/fcds/fcds.go @@ -112,14 +112,15 @@ func (s *Store) Get(addr chunk.Address) (ch chunk.Chunk, err error) { } defer s.unprotect() - sh := s.shards[getShard(addr)] - sh.mu.Lock() - defer sh.mu.Unlock() - m, err := s.getMeta(addr) if err != nil { return nil, err } + + sh := s.shards[getShard(addr)] + sh.mu.Lock() + defer sh.mu.Unlock() + data := make([]byte, m.Size) n, err := sh.f.ReadAt(data, m.Offset) if err != nil && err != io.EOF { @@ -138,10 +139,6 @@ func (s *Store) Has(addr chunk.Address) (yes bool, err error) { } defer s.unprotect() - mu := s.shards[getShard(addr)].mu - mu.Lock() - defer mu.Unlock() - return s.meta.Has(addr) } @@ -166,9 +163,6 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { shard := getShard(addr) sh := s.shards[shard] - sh.mu.Lock() - defer sh.mu.Unlock() - has, err := s.meta.Has(addr) if err != nil { return err @@ -177,6 +171,9 @@ func (s *Store) Put(ch chunk.Chunk) (err error) { return nil } + sh.mu.Lock() + defer sh.mu.Unlock() + offset, reclaimed, err := s.getOffset(shard) if err != nil { return err @@ -245,10 +242,6 @@ func (s *Store) Delete(addr chunk.Address) (err error) { shard := getShard(addr) s.markShardWithFreeOffsets(shard, true) - mu := s.shards[shard].mu - mu.Lock() - defer mu.Unlock() - if s.freeCache != nil { m, err := s.getMeta(addr) if err != nil {