@@ -16,6 +16,7 @@ const (
16
16
coordinationVersion = "v1"
17
17
zkReconnectPeriod = 1 * time .Second
18
18
defaultZKPort = 2181
19
+ maxCreateRetries = 5
19
20
)
20
21
21
22
var defaultZkACL = zk .WorldACL (zk .PERM_ALL )
@@ -234,9 +235,22 @@ func (w *zkWatcher) hookCreateEphemeral(node string) error {
234
235
w .RLock ()
235
236
defer w .RUnlock ()
236
237
237
- _ , err := w .conn .Create (node , "" , zk .EPHEMERAL , defaultZkACL )
238
- if err != nil {
239
- return err
238
+ // Retry a few times, in case the node is removed in between the two following
239
+ // steps.
240
+ for i := 0 ; i < maxCreateRetries ; i ++ {
241
+ _ , err := w .conn .Create (node , "" , zk .EPHEMERAL , defaultZkACL )
242
+ if err == nil {
243
+ break
244
+ } else if err != nil && ! isNoNode (err ) {
245
+ return err
246
+ }
247
+
248
+ // Create the parent nodes.
249
+ parent , _ := path .Split (node )
250
+ err = w .createAll (parent )
251
+ if err != nil {
252
+ return fmt .Errorf ("create %s: %s" , node , err )
253
+ }
240
254
}
241
255
242
256
return nil
@@ -279,7 +293,7 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
279
293
w .RLock ()
280
294
defer w .RUnlock ()
281
295
282
- children , _ , events , err := w .conn . ChildrenW (node )
296
+ children , _ , events , err := w .childrenW (node )
283
297
if err != nil {
284
298
return err
285
299
}
@@ -317,7 +331,7 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
317
331
}
318
332
319
333
w .RLock ()
320
- children , _ , events , err = w .conn . ChildrenW (node )
334
+ children , _ , events , err = w .childrenW (node )
321
335
w .RUnlock ()
322
336
323
337
if err != nil {
@@ -331,30 +345,36 @@ func (w *zkWatcher) hookWatchChildren(node string, wn watchedNode) error {
331
345
return nil
332
346
}
333
347
334
- // createPath creates a node and all its parents permanently.
335
- func (w * zkWatcher ) createPath (node string ) error {
336
- w .RLock ()
337
- defer w .RUnlock ()
348
+ func (w * zkWatcher ) childrenW (node string ) (children []string , stat * zk.Stat , events <- chan zk.Event , err error ) {
349
+ // Retry a few times, in case the node is removed in between the two following
350
+ // steps.
351
+ for i := 0 ; i < maxCreateRetries ; i ++ {
352
+ children , stat , events , err = w .conn .ChildrenW (node )
353
+ if ! isNoNode (err ) {
354
+ return
355
+ }
338
356
339
- node = path . Join ( w . prefix , node )
340
- err : = w .createAll (node )
341
- if err != nil {
342
- return fmt .Errorf ("create %s: %s" , node , err )
343
- } else {
344
- return err
357
+ // Create the node so we can watch it.
358
+ err = w .createAll (node )
359
+ if err != nil {
360
+ err = fmt .Errorf ("create %s: %s" , node , err )
361
+ return
362
+ }
345
363
}
364
+
365
+ return
346
366
}
347
367
348
- func (w * zkWatcher ) createAll (fullNode string ) error {
349
- base , _ := path .Split (path .Clean (fullNode ))
368
+ func (w * zkWatcher ) createAll (node string ) error {
369
+ base , _ := path .Split (path .Clean (node ))
350
370
if base != "" && base != "/" {
351
371
err := w .createAll (base )
352
372
if err != nil {
353
373
return err
354
374
}
355
375
}
356
376
357
- _ , err := w .conn .Create (path .Clean (fullNode ), "" , 0 , defaultZkACL )
377
+ _ , err := w .conn .Create (path .Clean (node ), "" , 0 , defaultZkACL )
358
378
if err != nil && ! isNodeExists (err ) {
359
379
return err
360
380
}
@@ -387,3 +407,11 @@ func isNodeExists(err error) bool {
387
407
388
408
return false
389
409
}
410
+
411
+ func isNoNode (err error ) bool {
412
+ if zkErr , ok := err .(* zk.Error ); ok && zkErr .Code == zk .ZNONODE {
413
+ return true
414
+ }
415
+
416
+ return false
417
+ }
0 commit comments