Skip to content

Commit

Permalink
Hold onto value log files while we have iterators
Browse files Browse the repository at this point in the history
Makes iterators hold a refcount on the value log to prevent it from
deleting value log files while we have an iterator running.  They'll
get deleted the first time in the future when no iterators are
running.

This prevents long-running iterators from inevitably failing with
ErrRetry.  Gets can still fail with ErrRetry and to fix that we would
have to change their interface.
  • Loading branch information
Sam Hughes committed Sep 14, 2017
1 parent 64df7f5 commit 9ed12b9
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 16 deletions.
3 changes: 3 additions & 0 deletions iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ func (it *Iterator) ValidForPrefix(prefix []byte) bool {
// Close would close the iterator. It is important to call this when you're done with iteration.
func (it *Iterator) Close() {
it.iitr.Close()
// TODO: We could handle this error.
_ = it.kv.vlog.decrIteratorCount()
}

// Next would advance the iterator by one. Always check it.Valid() after a Next()
Expand Down Expand Up @@ -343,6 +345,7 @@ func (it *Iterator) Rewind() {
func (s *KV) NewIterator(opt IteratorOptions) *Iterator {
tables, decr := s.getMemTables()
defer decr()
s.vlog.incrIteratorCount()
var iters []y.Iterator
for i := 0; i < len(tables); i++ {
iters = append(iters, tables[i].NewUniIterator(opt.Reverse))
Expand Down
86 changes: 70 additions & 16 deletions value.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ func (vlog *valueLog) rewrite(f *logFile) error {
elog.Printf("Processed %d entries in total", count)

elog.Printf("Removing fid: %d", f.fid)
var deleteFileNow bool
// Entries written to LSM. Remove the older file now.
{
vlog.filesLock.Lock()
Expand All @@ -374,19 +375,62 @@ func (vlog *valueLog) rewrite(f *logFile) error {
vlog.filesLock.Unlock()
return errors.Errorf("Unable to find fid: %d", f.fid)
}
delete(vlog.filesMap, f.fid)
if vlog.numActiveIterators == 0 {
delete(vlog.filesMap, f.fid)
deleteFileNow = true
} else {
vlog.filesToBeDeleted = append(vlog.filesToBeDeleted, f.fid)
}
vlog.filesLock.Unlock()
}

// Exclusively lock the file so that there are no readers before closing/destroying it
f.lock.Lock()
rem := vlog.fpath(f.fid)
y.Munmap(f.fmap)
f.fd.Close() // close file previous to remove it
f.lock.Unlock()
if deleteFileNow {
vlog.deleteLogFile(f)
}

elog.Printf("Removing %s", rem)
return os.Remove(rem)
return nil
}

func (vlog *valueLog) incrIteratorCount() {
vlog.filesLock.Lock()
vlog.numActiveIterators++
vlog.filesLock.Unlock()
}

func (vlog *valueLog) decrIteratorCount() error {
vlog.filesLock.Lock()

vlog.numActiveIterators--
if vlog.numActiveIterators != 0 {
vlog.filesLock.Unlock()
return nil
}
lfs := make([]*logFile, 0, len(vlog.filesToBeDeleted))
for _, id := range vlog.filesToBeDeleted {
lfs = append(lfs, vlog.filesMap[id])
delete(vlog.filesMap, id)
}
vlog.filesToBeDeleted = nil
vlog.filesLock.Unlock()

for _, lf := range lfs {
if err := vlog.deleteLogFile(lf); err != nil {
return err
}
}
return nil
}

func (vlog *valueLog) deleteLogFile(lf *logFile) error {
path := vlog.fpath(lf.fid)
if err := y.Munmap(lf.fmap); err != nil {
_ = lf.fd.Close()
return err
}
if err := lf.fd.Close(); err != nil {
return err
}
return os.Remove(path)
}

// Entry provides Key, Value and if required, CASCounterCheck to kv.BatchSet() API.
Expand Down Expand Up @@ -515,9 +559,12 @@ type valueLog struct {
dirPath string
elog trace.EventLog

// guards our view of which files exist
filesLock sync.RWMutex
filesMap map[uint32]*logFile
// guards our view of which files exist, which to be deleted, how many active iterators
filesLock sync.RWMutex
filesMap map[uint32]*logFile
filesToBeDeleted []uint32
// A refcount of iterators -- when this hits zero, we can delete the filesToBeDeleted.
numActiveIterators int

kv *KV
maxFid uint32
Expand Down Expand Up @@ -651,11 +698,18 @@ func (vlog *valueLog) Close() error {
return err
}

// sortedFids returns the file id's, sorted. Assumes we have shared access to filesMap.
// sortedFids returns the file id's not pending deletion, sorted. Assumes we have shared access to
// filesMap.
func (vlog *valueLog) sortedFids() []uint32 {
toBeDeleted := make(map[uint32]struct{})
for _, fid := range vlog.filesToBeDeleted {
toBeDeleted[fid] = struct{}{}
}
ret := make([]uint32, 0, len(vlog.filesMap))
for fid := range vlog.filesMap {
ret = append(ret, fid)
if _, ok := toBeDeleted[fid]; !ok {
ret = append(ret, fid)
}
}
sort.Slice(ret, func(i, j int) bool {
return ret[i] < ret[j]
Expand Down Expand Up @@ -892,10 +946,10 @@ func (vlog *valueLog) runGCInLoop(lc *y.Closer) {
func (vlog *valueLog) pickLog() *logFile {
vlog.filesLock.RLock()
defer vlog.filesLock.RUnlock()
if len(vlog.filesMap) <= 1 {
fids := vlog.sortedFids()
if len(fids) <= 1 {
return nil
}
fids := vlog.sortedFids()
// This file shouldn't be being written to.
idx := rand.Intn(len(fids))
if idx > 0 {
Expand Down
80 changes: 80 additions & 0 deletions value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"io/ioutil"
"math/rand"
"os"
"sync"
"testing"

"github.com/dgraph-io/badger/y"
Expand Down Expand Up @@ -228,6 +229,85 @@ func TestValueGC2(t *testing.T) {
}
}

func TestValueGC3(t *testing.T) {
dir, err := ioutil.TempDir("", "badger")
require.NoError(t, err)
defer os.RemoveAll(dir)
opt := getTestOptions(dir)
opt.ValueLogFileSize = 1 << 20
opt.ValueGCThreshold = 0.0 // Disable

kv, err := NewKV(opt)
require.NoError(t, err)
defer kv.Close()

// We want to test whether an iterator can continue through a value log GC.

valueSize := 32 << 10

var value3 []byte
var entries []*Entry
for i := 0; i < 100; i++ {
v := make([]byte, valueSize) // 32K * 100 will take >=3'276'800 B.
if i == 3 {
value3 = v
}
rand.Read(v[:])
// Keys key000, key001, key002, such that sorted order matches insertion order
entry := &Entry{
Key: []byte(fmt.Sprintf("key%03d", i)),
Value: v,
}
entries = append(entries, entry)
}
err = kv.BatchSet(entries)
require.NoError(t, err)
for _, e := range entries {
require.NoError(t, e.Error)
}

// Start an iterator to keys in the first value log file
itOpt := IteratorOptions{
PrefetchValues: false,
PrefetchSize: 0,
Reverse: false,
}

it := kv.NewIterator(itOpt)
defer it.Close()
// Walk a few keys
it.Rewind()
require.True(t, it.Valid())
item := it.Item()
require.Equal(t, []byte("key000"), item.Key())
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key001"), item.Key())
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key002"), item.Key())

// Like other tests, we pull out a logFile to rewrite it directly

kv.vlog.filesLock.RLock()
logFile := kv.vlog.filesMap[kv.vlog.sortedFids()[0]]
kv.vlog.filesLock.RUnlock()

kv.vlog.rewrite(logFile)
it.Next()
require.True(t, it.Valid())
item = it.Item()
require.Equal(t, []byte("key003"), item.Key())
var v3 []byte
var wg sync.WaitGroup
wg.Add(1)
item.Value(func(x []byte) error { v3 = x; wg.Done(); return nil })
wg.Wait()
require.Equal(t, value3, v3)
}

var (
k1 = []byte("k1")
k2 = []byte("k2")
Expand Down

0 comments on commit 9ed12b9

Please sign in to comment.