This repository has been archived by the owner on Mar 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 22
/
partitions.go
224 lines (185 loc) · 5.08 KB
/
partitions.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
package main
import (
"fmt"
"log"
"path"
"strconv"
"strings"
"sync"
"time"
)
// TODO testable
// partitions represents a list of partitions for a single version and their
// mapping to nodes, synced from zookeeper. It's also responsible for
// advertising the partitions we have locally.
type partitions struct {
peers *peers
zkWatcher *zkWatcher
db string
version string
zkPath string
numPartitions int
replication int
missing int
local map[int]bool
remote map[int][]string
lock sync.RWMutex
noneMissing chan bool
}
func watchPartitions(zkWatcher *zkWatcher, peers *peers, db, version string, numPartitions, replication int) *partitions {
p := &partitions{
peers: peers,
zkWatcher: zkWatcher,
db: db,
version: version,
zkPath: path.Join("partitions", db, version),
numPartitions: numPartitions,
replication: replication,
local: make(map[int]bool),
remote: make(map[int][]string),
noneMissing: make(chan bool),
}
updates, _ := zkWatcher.watchChildren(p.zkPath)
p.updateRemotePartitions(<-updates)
go p.sync(updates)
return p
}
// pickLocalPartitions selects which partitions are local by iterating through
// them all, and checking the hashring to see if this peer is one of the
// replicas.
func (p *partitions) pickLocalPartitions() map[int]bool {
partitions := make(map[int]bool)
disp := make([]int, 0)
for i := 0; i < p.numPartitions; i++ {
partitionId := p.partitionId(i)
replicas := p.peers.pick(partitionId, p.replication)
for _, replica := range replicas {
if replica == peerSelf {
partitions[i] = true
disp = append(disp, i)
}
}
}
return partitions
}
// sync syncs the remote partitions from zoolander whenever they change.
func (p *partitions) sync(updates chan []string) {
for {
nodes, ok := <-updates
if !ok {
close(p.noneMissing)
break
}
p.updateRemotePartitions(nodes)
}
}
func (p *partitions) updateLocalPartitions(local map[int]bool) {
p.lock.Lock()
defer p.lock.Unlock()
p.local = local
p.updateMissing()
}
func (p *partitions) updateRemotePartitions(nodes []string) {
p.lock.Lock()
defer p.lock.Unlock()
remote := make(map[int][]string)
for _, node := range nodes {
parts := strings.SplitN(node, "@", 2)
partition, _ := strconv.Atoi(parts[0])
host := parts[1]
if host != p.peers.address {
remote[partition] = append(remote[partition], host)
}
}
p.remote = remote
p.updateMissing()
}
func (p *partitions) updateMissing() {
// Check for each partition. If every one is available on at least one node,
// then we're ready to rumble.
missing := 0
for i := 0; i < p.numPartitions; i++ {
if _, ok := p.local[i]; ok {
continue
}
if _, ok := p.remote[i]; ok {
continue
}
missing += 1
}
p.missing = missing
if missing == 0 {
select {
case p.noneMissing <- true:
default:
}
}
}
func (p *partitions) ready() bool {
p.lock.RLock()
defer p.lock.RUnlock()
return p.missing == 0
}
// advertiseAndWait advertises the partitions we have locally, and waits until
// it sees at least one peer for every remote partition. It returns false only
// if it was closed before that happens.
func (p *partitions) advertiseAndWait() bool {
// Advertise that our local partitions are ready.
p.advertisePartitions()
for {
p.lock.RLock()
missing := p.missing
p.lock.RUnlock()
if missing == 0 {
break
}
log.Printf("Waiting for all partitions of %s version %s to be available (missing %d)",
p.db, p.version, missing)
t := time.NewTimer(10 * time.Second)
select {
case <-t.C:
case success := <-p.noneMissing:
// If success is false, it's because the close() was called before we
// finished waiting on peers.
return success
}
}
return true
}
// advertisePartitions creates an ephemeral node for each partition this local
// peer is responsible for.
// TODO: this should maybe be a zk multi op?
func (p *partitions) advertisePartitions() {
for partition := range p.local {
p.zkWatcher.createEphemeral(p.partitionZKNode(partition))
}
}
func (p *partitions) unadvertisePartitions() {
for partition := range p.local {
p.zkWatcher.removeEphemeral(p.partitionZKNode(partition))
}
}
func (p *partitions) partitionZKNode(partition int) string {
return path.Join(p.zkPath, fmt.Sprintf("%05d@%s", partition, p.peers.address))
}
// getPeers returns the list of peers who have the given partition available.
func (p *partitions) getPeers(partition int) []string {
p.lock.RLock()
defer p.lock.RUnlock()
peers := make([]string, len(p.remote[partition]))
copy(peers, p.remote[partition])
return peers
}
// partitionId returns a string id for the given partition, to be used for the
// consistent hashing ring. It's not really meant to be unique, but it should be
// different for different versions with the same number of partitions, so that
// they don't shard identically.
func (p *partitions) partitionId(partition int) string {
return fmt.Sprintf("%s:%05d", p.zkPath, partition)
}
func (p *partitions) close() {
p.lock.Lock()
p.lock.Unlock()
p.zkWatcher.removeWatch(p.zkPath)
p.unadvertisePartitions()
}