Skip to content

Commit

Permalink
Opt(pickTables): Fix an optimization regression
Browse files Browse the repository at this point in the history
NewKeyIterator uses pickTables which was optimized in the past. But, a
recent PR: dgraph-io#1546 removed this
optimization, which is now making NewKeyIterator quite expensive.

This PR brings that optimization back.
  • Loading branch information
manishrjain committed Dec 3, 2020
1 parent 70088c6 commit 74f2e02
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
19 changes: 16 additions & 3 deletions iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ func (opt *IteratorOptions) pickTable(t table.TableInterface) bool {
// Bloom filter lookup would only work if opt.Prefix does NOT have the read
// timestamp as part of the key.
if opt.prefixIsKey && t.DoesNotHave(y.Hash(opt.Prefix)) {
y.NumLSMBloomHits.Add("pickTable", 1)
return false
}
return true
Expand All @@ -369,6 +368,8 @@ func (opt *IteratorOptions) pickTables(all []*table.Table) []*table.Table {
return out
}
sIdx := sort.Search(len(all), func(i int) bool {
// table.Biggest >= opt.prefix
// if opt.Prefix < table.Biggest, then surely it is not in any of the preceding tables.
return opt.compareToPrefix(all[i].Biggest()) >= 0
})
if sIdx == len(all) {
Expand All @@ -386,11 +387,23 @@ func (opt *IteratorOptions) pickTables(all []*table.Table) []*table.Table {
return out
}

// opt.prefixIsKey == true. This code is optimizing for opt.prefixIsKey part.
var out []*table.Table
hash := y.Hash(opt.Prefix)
for _, t := range filtered {
if opt.pickTable(t) {
out = append(out, t)
// When we encounter the first table whose smallest key is higher than opt.Prefix, we can
// stop. This is an IMPORTANT optimization, just considering how often we call
// NewKeyIterator.
if opt.compareToPrefix(t.Smallest()) > 0 {
// if table.Smallest > opt.Prefix, then this and all tables after this can be ignored.
break
}
// opt.Prefix is actually the key. So, we can run bloom filter checks
// as well.
if t.DoesNotHave(hash) {
continue
}
out = append(out, t)
}
return out
}
Expand Down
7 changes: 6 additions & 1 deletion table/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,9 +630,14 @@ func (t *Table) DoesNotHave(hash uint32) bool {
return false
}

y.NumLSMBloomHits.Add("DoesNotHave_ALL", 1)
index := t.fetchIndex()
bf := index.BloomFilterBytes()
return !y.Filter(bf).MayContain(hash)
mayContain := y.Filter(bf).MayContain(hash)
if !mayContain {
y.NumLSMBloomHits.Add("DoesNotHave_HIT", 1)
}
return !mayContain
}

// readTableIndex reads table index from the sst and returns its pb format.
Expand Down

0 comments on commit 74f2e02

Please sign in to comment.