123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
- // All rights reserved.
- //
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- package leveldb
- import (
- "fmt"
- "sync/atomic"
- "unsafe"
- "github.com/syndtr/goleveldb/leveldb/iterator"
- "github.com/syndtr/goleveldb/leveldb/opt"
- "github.com/syndtr/goleveldb/leveldb/util"
- )
- type tSet struct {
- level int
- table *tFile
- }
- type version struct {
- s *session
- levels []tFiles
- // Level that should be compacted next and its compaction score.
- // Score < 1 means compaction is not strictly needed. These fields
- // are initialized by computeCompaction()
- cLevel int
- cScore float64
- cSeek unsafe.Pointer
- closing bool
- ref int
- released bool
- }
- func newVersion(s *session) *version {
- return &version{s: s}
- }
- func (v *version) incref() {
- if v.released {
- panic("already released")
- }
- v.ref++
- if v.ref == 1 {
- // Incr file ref.
- for _, tt := range v.levels {
- for _, t := range tt {
- v.s.addFileRef(t.fd, 1)
- }
- }
- }
- }
- func (v *version) releaseNB() {
- v.ref--
- if v.ref > 0 {
- return
- } else if v.ref < 0 {
- panic("negative version ref")
- }
- for _, tt := range v.levels {
- for _, t := range tt {
- if v.s.addFileRef(t.fd, -1) == 0 {
- v.s.tops.remove(t)
- }
- }
- }
- v.released = true
- }
- func (v *version) release() {
- v.s.vmu.Lock()
- v.releaseNB()
- v.s.vmu.Unlock()
- }
- func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
- ukey := ikey.ukey()
- // Aux level.
- if aux != nil {
- for _, t := range aux {
- if t.overlaps(v.s.icmp, ukey, ukey) {
- if !f(-1, t) {
- return
- }
- }
- }
- if lf != nil && !lf(-1) {
- return
- }
- }
- // Walk tables level-by-level.
- for level, tables := range v.levels {
- if len(tables) == 0 {
- continue
- }
- if level == 0 {
- // Level-0 files may overlap each other. Find all files that
- // overlap ukey.
- for _, t := range tables {
- if t.overlaps(v.s.icmp, ukey, ukey) {
- if !f(level, t) {
- return
- }
- }
- }
- } else {
- if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
- t := tables[i]
- if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
- if !f(level, t) {
- return
- }
- }
- }
- }
- if lf != nil && !lf(level) {
- return
- }
- }
- }
- func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
- if v.closing {
- return nil, false, ErrClosed
- }
- ukey := ikey.ukey()
- var (
- tset *tSet
- tseek bool
- // Level-0.
- zfound bool
- zseq uint64
- zkt keyType
- zval []byte
- )
- err = ErrNotFound
- // Since entries never hop across level, finding key/value
- // in smaller level make later levels irrelevant.
- v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
- if level >= 0 && !tseek {
- if tset == nil {
- tset = &tSet{level, t}
- } else {
- tseek = true
- }
- }
- var (
- fikey, fval []byte
- ferr error
- )
- if noValue {
- fikey, ferr = v.s.tops.findKey(t, ikey, ro)
- } else {
- fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
- }
- switch ferr {
- case nil:
- case ErrNotFound:
- return true
- default:
- err = ferr
- return false
- }
- if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
- if v.s.icmp.uCompare(ukey, fukey) == 0 {
- // Level <= 0 may overlaps each-other.
- if level <= 0 {
- if fseq >= zseq {
- zfound = true
- zseq = fseq
- zkt = fkt
- zval = fval
- }
- } else {
- switch fkt {
- case keyTypeVal:
- value = fval
- err = nil
- case keyTypeDel:
- default:
- panic("leveldb: invalid internalKey type")
- }
- return false
- }
- }
- } else {
- err = fkerr
- return false
- }
- return true
- }, func(level int) bool {
- if zfound {
- switch zkt {
- case keyTypeVal:
- value = zval
- err = nil
- case keyTypeDel:
- default:
- panic("leveldb: invalid internalKey type")
- }
- return false
- }
- return true
- })
- if tseek && tset.table.consumeSeek() <= 0 {
- tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
- }
- return
- }
- func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
- var tset *tSet
- v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
- if tset == nil {
- tset = &tSet{level, t}
- return true
- }
- if tset.table.consumeSeek() <= 0 {
- tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
- }
- return false
- }, nil)
- return
- }
- func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
- strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
- for level, tables := range v.levels {
- if level == 0 {
- // Merge all level zero files together since they may overlap.
- for _, t := range tables {
- its = append(its, v.s.tops.newIterator(t, slice, ro))
- }
- } else if len(tables) != 0 {
- its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
- }
- }
- return
- }
- func (v *version) newStaging() *versionStaging {
- return &versionStaging{base: v}
- }
- // Spawn a new version based on this version.
- func (v *version) spawn(r *sessionRecord) *version {
- staging := v.newStaging()
- staging.commit(r)
- return staging.finish()
- }
- func (v *version) fillRecord(r *sessionRecord) {
- for level, tables := range v.levels {
- for _, t := range tables {
- r.addTableFile(level, t)
- }
- }
- }
- func (v *version) tLen(level int) int {
- if level < len(v.levels) {
- return len(v.levels[level])
- }
- return 0
- }
- func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
- for level, tables := range v.levels {
- for _, t := range tables {
- if v.s.icmp.Compare(t.imax, ikey) <= 0 {
- // Entire file is before "ikey", so just add the file size
- n += t.size
- } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
- // Entire file is after "ikey", so ignore
- if level > 0 {
- // Files other than level 0 are sorted by meta->min, so
- // no further files in this level will contain data for
- // "ikey".
- break
- }
- } else {
- // "ikey" falls in the range for this table. Add the
- // approximate offset of "ikey" within the table.
- if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
- n += m
- } else {
- return 0, err
- }
- }
- }
- }
- return
- }
- func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
- if maxLevel > 0 {
- if len(v.levels) == 0 {
- return maxLevel
- }
- if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
- var overlaps tFiles
- for ; level < maxLevel; level++ {
- if pLevel := level + 1; pLevel >= len(v.levels) {
- return maxLevel
- } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
- break
- }
- if gpLevel := level + 2; gpLevel < len(v.levels) {
- overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
- if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
- break
- }
- }
- }
- }
- }
- return
- }
- func (v *version) computeCompaction() {
- // Precomputed best level for next compaction
- bestLevel := int(-1)
- bestScore := float64(-1)
- statFiles := make([]int, len(v.levels))
- statSizes := make([]string, len(v.levels))
- statScore := make([]string, len(v.levels))
- statTotSize := int64(0)
- for level, tables := range v.levels {
- var score float64
- size := tables.size()
- if level == 0 {
- // We treat level-0 specially by bounding the number of files
- // instead of number of bytes for two reasons:
- //
- // (1) With larger write-buffer sizes, it is nice not to do too
- // many level-0 compaction.
- //
- // (2) The files in level-0 are merged on every read and
- // therefore we wish to avoid too many files when the individual
- // file size is small (perhaps because of a small write-buffer
- // setting, or very high compression ratios, or lots of
- // overwrites/deletions).
- score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
- } else {
- score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
- }
- if score > bestScore {
- bestLevel = level
- bestScore = score
- }
- statFiles[level] = len(tables)
- statSizes[level] = shortenb(int(size))
- statScore[level] = fmt.Sprintf("%.2f", score)
- statTotSize += size
- }
- v.cLevel = bestLevel
- v.cScore = bestScore
- v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
- }
- func (v *version) needCompaction() bool {
- return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
- }
- type tablesScratch struct {
- added map[int64]atRecord
- deleted map[int64]struct{}
- }
- type versionStaging struct {
- base *version
- levels []tablesScratch
- }
- func (p *versionStaging) getScratch(level int) *tablesScratch {
- if level >= len(p.levels) {
- newLevels := make([]tablesScratch, level+1)
- copy(newLevels, p.levels)
- p.levels = newLevels
- }
- return &(p.levels[level])
- }
- func (p *versionStaging) commit(r *sessionRecord) {
- // Deleted tables.
- for _, r := range r.deletedTables {
- scratch := p.getScratch(r.level)
- if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
- if scratch.deleted == nil {
- scratch.deleted = make(map[int64]struct{})
- }
- scratch.deleted[r.num] = struct{}{}
- }
- if scratch.added != nil {
- delete(scratch.added, r.num)
- }
- }
- // New tables.
- for _, r := range r.addedTables {
- scratch := p.getScratch(r.level)
- if scratch.added == nil {
- scratch.added = make(map[int64]atRecord)
- }
- scratch.added[r.num] = r
- if scratch.deleted != nil {
- delete(scratch.deleted, r.num)
- }
- }
- }
- func (p *versionStaging) finish() *version {
- // Build new version.
- nv := newVersion(p.base.s)
- numLevel := len(p.levels)
- if len(p.base.levels) > numLevel {
- numLevel = len(p.base.levels)
- }
- nv.levels = make([]tFiles, numLevel)
- for level := 0; level < numLevel; level++ {
- var baseTabels tFiles
- if level < len(p.base.levels) {
- baseTabels = p.base.levels[level]
- }
- if level < len(p.levels) {
- scratch := p.levels[level]
- var nt tFiles
- // Prealloc list if possible.
- if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
- nt = make(tFiles, 0, n)
- }
- // Base tables.
- for _, t := range baseTabels {
- if _, ok := scratch.deleted[t.fd.Num]; ok {
- continue
- }
- if _, ok := scratch.added[t.fd.Num]; ok {
- continue
- }
- nt = append(nt, t)
- }
- // New tables.
- for _, r := range scratch.added {
- nt = append(nt, tableFileFromRecord(r))
- }
- if len(nt) != 0 {
- // Sort tables.
- if level == 0 {
- nt.sortByNum()
- } else {
- nt.sortByKey(p.base.s.icmp)
- }
- nv.levels[level] = nt
- }
- } else {
- nv.levels[level] = baseTabels
- }
- }
- // Trim levels.
- n := len(nv.levels)
- for ; n > 0 && nv.levels[n-1] == nil; n-- {
- }
- nv.levels = nv.levels[:n]
- // Compute compaction score for new version.
- nv.computeCompaction()
- return nv
- }
- type versionReleaser struct {
- v *version
- once bool
- }
- func (vr *versionReleaser) Release() {
- v := vr.v
- v.s.vmu.Lock()
- if !vr.once {
- v.releaseNB()
- vr.once = true
- }
- v.s.vmu.Unlock()
- }
|