Skip to content

Commit

Permalink
making vlog threshold dynamic
Browse files Browse the repository at this point in the history
  • Loading branch information
aman-bansal committed Jan 5, 2021
1 parent 3adc574 commit 6ce3b7c
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 12 deletions.
6 changes: 1 addition & 5 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,10 +696,6 @@ var requestPool = sync.Pool{
},
}

func (opt Options) skipVlog(e *Entry) bool {
return len(e.Value) < opt.ValueThreshold
}

func (db *DB) writeToLSM(b *request) error {
// We should check the length of b.Prts and b.Entries only when badger is not
// running in InMemory mode. In InMemory mode, we don't write anything to the
Expand All @@ -710,7 +706,7 @@ func (db *DB) writeToLSM(b *request) error {

for i, entry := range b.Entries {
var err error
if db.opt.skipVlog(entry) {
if db.vlog.skipVlog(entry) {
// Will include deletion / tombstone case.
err = db.mt.Put(entry.Key,
y.ValueStruct{
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/dgraph-io/badger/v2

go 1.12

// replace github.com/dgraph-io/ristretto => /home/mrjn/go/src/github.com/dgraph-io/ristretto
replace github.com/dgraph-io/ristretto => /home/amanbansal/go/src/github.com/dgraph-io/ristretto

require (
github.com/DataDog/zstd v1.4.1
Expand Down
18 changes: 15 additions & 3 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@ type Options struct {
TableSizeMultiplier int
MaxLevels int

ValueThreshold int
NumMemtables int
DynamicValueThreshold bool
ValueMinBound float64
ValueMaxBound float64
ValueBoundStep float64
ValueThreshold int
NumMemtables int
// Changing BlockSize across DB runs will not break badger. The block size is
// read from the block index stored at the end of the table.
BlockSize int
Expand Down Expand Up @@ -154,7 +158,15 @@ func DefaultOptions(path string) Options {
ValueLogFileSize: 1<<30 - 1,

ValueLogMaxEntries: 1000000,
ValueThreshold: 1 << 10, // 1 KB.

// todo change this ot vald ones
DynamicValueThreshold: true,
ValueThreshold: 32, // 1 KB.
ValueBoundStep: 4, // 1 KB
ValueMinBound: 32, // 1 KB
ValueMaxBound: 512, // 1 MB


Logger: defaultLogger(INFO),
EncryptionKey: []byte{},
EncryptionKeyRotationDuration: 10 * 24 * time.Hour, // Default 10 days.
Expand Down
2 changes: 1 addition & 1 deletion stream_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ func (w *sortedWriter) handleRequests() {
for i, e := range req.Entries {
// If badger is running in InMemory mode, len(req.Ptrs) == 0.
var vs y.ValueStruct
if w.db.opt.skipVlog(e) {
if w.db.vlog.skipVlog(e) {
vs = y.ValueStruct{
Value: e.Value,
Meta: e.meta,
Expand Down
74 changes: 72 additions & 2 deletions value.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,25 @@ func (vlog *valueLog) dropAll() (int, error) {
return count, nil
}

type vLogThreshold struct {
sync.RWMutex
// Metrics contains a running log of statistics like amount of data stored etc.
vlMetrics *z.HistogramData
valueThreshold int
}

func (v *vLogThreshold) update(val int) {
v.Lock()
defer v.Unlock()
v.valueThreshold = val
}

func (v *vLogThreshold) threshold() int {
v.RLock()
defer v.RUnlock()
return v.valueThreshold
}

type valueLog struct {
dirPath string

Expand All @@ -455,13 +474,23 @@ type valueLog struct {
opt Options

garbageCh chan struct{}
valueCh chan int64
vCloser *z.Closer
discardStats *discardStats

// Metrics contains a running log of statistics like amount of data stored etc.
vlMetrics *z.HistogramData
valueThreshold int
}

func vlogFilePath(dirPath string, fid uint32) string {
return fmt.Sprintf("%s%s%06d.vlog", dirPath, string(os.PathSeparator), fid)
}

func (vlog *valueLog) skipVlog(e *Entry) bool {
return len(e.Value) < vlog.valueThreshold
}

func (vlog *valueLog) fpath(fid uint32) string {
return vlogFilePath(vlog.dirPath, fid)
}
Expand Down Expand Up @@ -546,6 +575,45 @@ func (vlog *valueLog) init(db *DB) {
return
}
vlog.dirPath = vlog.opt.ValueDir
vlog.valueThreshold = db.opt.ValueThreshold

if db.opt.DynamicValueThreshold {
vlog.valueCh = make(chan int64, 1000)
// setting histogram bound between vlogMinBound-vlogMaxBound with default 1KB-1MB
// this will give histogram range between 1kb-1mb
// each bucket would be of size vlogBoundStep default 1kb
size := int(math.Ceil((db.opt.ValueMaxBound - db.opt.ValueMinBound) / db.opt.
ValueBoundStep))
bounds := make([]float64, size)
for i := range bounds {
if i == 0 {
bounds[0] = db.opt.ValueMinBound
continue
}
bounds[i] = bounds[i-1] + db.opt.ValueBoundStep
}
vlog.vlMetrics = z.NewHistogramData(bounds)
vlog.vCloser = z.NewCloser(1)
go func() {
defer vlog.vCloser.Done()
for {
select {
case v := <-vlog.valueCh:
vlog.vlMetrics.Update(v)
// we are making it to get 99 percentile so that only values
// in range of 1 percentile will make it to the value log
p := int(vlog.vlMetrics.Percentile(0.99))
if vlog.valueThreshold != p {
vlog.opt.Infof("updating value threshold from: %d to: %d",
vlog.valueThreshold, p)
vlog.valueThreshold = p
}
case <-vlog.vCloser.HasBeenClosed():
return
}
}
}()
}

vlog.garbageCh = make(chan struct{}, 1) // Only allow one GC at a time.
lf, err := initDiscardStats(vlog.opt)
Expand Down Expand Up @@ -624,7 +692,7 @@ func (vlog *valueLog) Close() error {
}

vlog.opt.Debugf("Stopping garbage collection of values.")

vlog.vCloser.SignalAndWait()
var err error
for id, lf := range vlog.filesMap {
lf.lock.Lock() // We won’t release the lock.
Expand Down Expand Up @@ -846,7 +914,7 @@ func (vlog *valueLog) write(reqs []*request) error {
buf.Reset()

e := b.Entries[j]
if vlog.db.opt.skipVlog(e) {
if vlog.skipVlog(e) {
b.Ptrs = append(b.Ptrs, valuePointer{})
continue
}
Expand Down Expand Up @@ -877,6 +945,8 @@ func (vlog *valueLog) write(reqs []*request) error {
written++
bytesWritten += buf.Len()

vlog.vlMetrics.Update(int64(len(e.Value)))
vlog.valueThreshold = int(math.Round(vlog.vlMetrics.Percentile(1)))
// No need to flush anything, we write to file directly via mmap.
}
y.NumWrites.Add(int64(written))
Expand Down
34 changes: 34 additions & 0 deletions value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,40 @@ import (
"github.com/stretchr/testify/require"
)

func TestDynamicValueThreshold(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
y.Check(err)
defer removeDir(dir)

getRandString := func(n int) string {
letters := []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
b := make([]byte, n)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}

kv, _ := Open(getTestOptions(dir).WithValueThreshold(32))
defer kv.Close()
log := &kv.vlog
for vl := 16; vl <= 1024; vl = vl + 4 {
for i := 0; i < 1000; i++ {
val := getRandString(vl)
e1 := &Entry{
Key: []byte(fmt.Sprintf("samplekey_%d_%d", vl, i)),
Value: []byte(val),
meta: bitValuePointer,
}

b := new(request)
b.Entries = []*Entry{e1}
log.write([]*request{b})
//t.Logf("Pointer written: %+v", b.Ptrs[0])
}
}
}

func TestValueBasic(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
y.Check(err)
Expand Down

0 comments on commit 6ce3b7c

Please sign in to comment.