Skip to content
This repository was archived by the owner on Mar 5, 2024. It is now read-only.

Commit 82580a2

Browse files
committed
Create parent znodes as part of watching or creating ephemeral nodes
The trick here is doing it in a way that's safe to the node being deleted mid-process. To make that safe, we wrap the Create and ChildrenW calls in a retry loop.
1 parent a619a0a commit 82580a2

File tree

4 files changed

+46
-36
lines changed

4 files changed

+46
-36
lines changed

partitions.go

-5
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ func watchPartitions(zkWatcher *zkWatcher, peers *peers, db, version string, num
4848
noneMissing: make(chan bool),
4949
}
5050

51-
// Create the partitions path we're going to watch, in case no one has done
52-
// that yet.
53-
p.zkPath = path.Join("partitions", db, version)
54-
zkWatcher.createPath(p.zkPath)
55-
5651
updates, _ := zkWatcher.watchChildren(p.zkPath)
5752
p.updateRemotePartitions(<-updates)
5853
go p.sync(updates)

peers.go

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ func watchPeers(zkWatcher *zkWatcher, shardID, address string) *peers {
4242
resetConvergenceTimer: make(chan bool),
4343
}
4444

45-
zkWatcher.createPath("nodes")
4645
node := path.Join("nodes", fmt.Sprintf("%s@%s", p.shardID, p.address))
4746
zkWatcher.createEphemeral(node)
4847

zk_watcher.go

+46-18
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const (
1616
coordinationVersion = "v1"
1717
zkReconnectPeriod = 1 * time.Second
1818
defaultZKPort = 2181
19+
maxCreateRetries = 5
1920
)
2021

2122
var defaultZkACL = zk.WorldACL(zk.PERM_ALL)
@@ -234,9 +235,22 @@ func (w *zkWatcher) hookCreateEphemeral(node string) error {
234235
w.RLock()
235236
defer w.RUnlock()
236237

237-
_, err := w.conn.Create(node, "", zk.EPHEMERAL, defaultZkACL)
238-
if err != nil {
239-
return err
238+
// Retry a few times, in case the node is removed in between the two following
239+
// steps.
240+
for i := 0; i < maxCreateRetries; i++ {
241+
_, err := w.conn.Create(node, "", zk.EPHEMERAL, defaultZkACL)
242+
if err == nil {
243+
break
244+
} else if err != nil && !isNoNode(err) {
245+
return err
246+
}
247+
248+
// Create the parent nodes.
249+
parent, _ := path.Split(node)
250+
err = w.createAll(parent)
251+
if err != nil {
252+
return fmt.Errorf("create %s: %s", node, err)
253+
}
240254
}
241255

242256
return nil
@@ -279,7 +293,7 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
279293
w.RLock()
280294
defer w.RUnlock()
281295

282-
children, _, events, err := w.conn.ChildrenW(node)
296+
children, _, events, err := w.childrenW(node)
283297
if err != nil {
284298
return err
285299
}
@@ -317,7 +331,7 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
317331
}
318332

319333
w.RLock()
320-
children, _, events, err = w.conn.ChildrenW(node)
334+
children, _, events, err = w.childrenW(node)
321335
w.RUnlock()
322336

323337
if err != nil {
@@ -331,30 +345,36 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
331345
return nil
332346
}
333347

334-
// createPath creates a node and all its parents permanently.
335-
func (w *zkWatcher) createPath(node string) error {
336-
w.RLock()
337-
defer w.RUnlock()
348+
func (w *zkWatcher) childrenW(node string) (children []string, stat *zk.Stat, events <-chan zk.Event, err error) {
349+
// Retry a few times, in case the node is removed in between the two following
350+
// steps.
351+
for i := 0; i < maxCreateRetries; i++ {
352+
children, stat, events, err = w.conn.ChildrenW(node)
353+
if !isNoNode(err) {
354+
return
355+
}
338356

339-
node = path.Join(w.prefix, node)
340-
err := w.createAll(node)
341-
if err != nil {
342-
return fmt.Errorf("create %s: %s", node, err)
343-
} else {
344-
return err
357+
// Create the node so we can watch it.
358+
err = w.createAll(node)
359+
if err != nil {
360+
err = fmt.Errorf("create %s: %s", node, err)
361+
return
362+
}
345363
}
364+
365+
return
346366
}
347367

348-
func (w *zkWatcher) createAll(fullNode string) error {
349-
base, _ := path.Split(path.Clean(fullNode))
368+
func (w *zkWatcher) createAll(node string) error {
369+
base, _ := path.Split(path.Clean(node))
350370
if base != "" && base != "/" {
351371
err := w.createAll(base)
352372
if err != nil {
353373
return err
354374
}
355375
}
356376

357-
_, err := w.conn.Create(path.Clean(fullNode), "", 0, defaultZkACL)
377+
_, err := w.conn.Create(path.Clean(node), "", 0, defaultZkACL)
358378
if err != nil && !isNodeExists(err) {
359379
return err
360380
}
@@ -387,3 +407,11 @@ func isNodeExists(err error) bool {
387407

388408
return false
389409
}
410+
411+
func isNoNode(err error) bool {
412+
if zkErr, ok := err.(*zk.Error); ok && zkErr.Code == zk.ZNONODE {
413+
return true
414+
}
415+
416+
return false
417+
}

zk_watcher_test.go

-12
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,6 @@ func TestZKWatcher(t *testing.T) {
103103
defer w.close()
104104
defer tzk.close()
105105

106-
err := w.createPath("/foo")
107-
require.NoError(t, err, "createPath should work")
108-
109106
updates, _ := w.watchChildren("/foo")
110107
go func() {
111108
w.createEphemeral("/foo/bar")
@@ -123,9 +120,6 @@ func TestZKWatcherReconnect(t *testing.T) {
123120
defer w.close()
124121
defer tzk.close()
125122

126-
err := w.createPath("/foo")
127-
require.NoError(t, err, "createPath should work")
128-
129123
updates, _ := w.watchChildren("/foo")
130124
go func() {
131125
w.createEphemeral("/foo/bar")
@@ -144,9 +138,6 @@ func TestZKWatchesCanceled(t *testing.T) {
144138
defer w.close()
145139
defer tzk.close()
146140

147-
err := w.createPath("/foo")
148-
require.NoError(t, err, "createPath should work")
149-
150141
w.watchChildren("/foo")
151142

152143
for i := 0; i < 3; i++ {
@@ -161,9 +152,6 @@ func TestZKRemoveWatch(t *testing.T) {
161152
defer w.close()
162153
defer tzk.close()
163154

164-
err := w.createPath("/foo")
165-
require.NoError(t, err, "createPath should work")
166-
167155
updates, disconnected := w.watchChildren("/foo")
168156

169157
w.createEphemeral("/foo/bar")

0 commit comments

Comments
 (0)