-
Notifications
You must be signed in to change notification settings - Fork 159
/
Copy pathmaintner.go
426 lines (381 loc) · 11.7 KB
/
maintner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package maintner mirrors, searches, syncs, and serves Git, Github,
// and Gerrit metadata.
//
// Maintner is short for "Maintainer". This package is intended for
// use by many tools. The name of the daemon that serves the maintner
// data to other tools is "maintnerd".
package maintner
import (
"context"
"errors"
"fmt"
"log"
"regexp"
"sync"
"time"
"golang.org/x/build/maintner/maintpb"
"golang.org/x/sync/errgroup"
"golang.org/x/time/rate"
)
// Corpus holds all of a project's metadata.
type Corpus struct {
mutationLogger MutationLogger // non-nil when this is a self-updating corpus
mutationSource MutationSource // from Initialize
verbose bool
dataDir string
sawErrSplit bool
mu sync.RWMutex // guards all following fields
// corpus state:
didInit bool // true after Initialize completes successfully
debug bool
strIntern map[string]string // interned strings, including binary githashes
// pubsub:
activityChans map[string]chan struct{} // keyed by topic
// github-specific
github *GitHub
gerrit *Gerrit
watchedGithubRepos []watchedGithubRepo
watchedGerritRepos []watchedGerritRepo
githubLimiter *rate.Limiter
// git-specific:
lastGitCount time.Time // last time of log spam about loading status
pollGitDirs []polledGitCommits
gitPeople map[string]*GitPerson
gitCommit map[GitHash]*GitCommit
gitCommitTodo map[GitHash]bool // -> true
gitOfHg map[string]GitHash // hg hex hash -> git hash
zoneCache map[string]*time.Location // "+0530" => location
}
// RLock grabs the corpus's read lock. Grabbing the read lock prevents
// any concurrent writes from mutating the corpus. This is only
// necessary if the application is querying the corpus and calling its
// Update method concurrently.
func (c *Corpus) RLock() { c.mu.RLock() }
// RUnlock unlocks the corpus's read lock.
func (c *Corpus) RUnlock() { c.mu.RUnlock() }
type polledGitCommits struct {
repo *maintpb.GitRepo
dir string
}
// EnableLeaderMode prepares c to be the leader. This should only be
// called by the maintnerd process.
//
// The provided scratchDir will store git checkouts.
func (c *Corpus) EnableLeaderMode(logger MutationLogger, scratchDir string) {
c.mutationLogger = logger
c.dataDir = scratchDir
}
// SetVerbose enables or disables verbose logging.
func (c *Corpus) SetVerbose(v bool) { c.verbose = v }
func (c *Corpus) getDataDir() string {
if c.dataDir == "" {
panic("getDataDir called before Corpus.EnableLeaderMode")
}
return c.dataDir
}
// GitHub returns the corpus's github data.
func (c *Corpus) GitHub() *GitHub {
if c.github != nil {
return c.github
}
return new(GitHub)
}
// Gerrit returns the corpus's Gerrit data.
func (c *Corpus) Gerrit() *Gerrit {
if c.gerrit != nil {
return c.gerrit
}
return new(Gerrit)
}
// Check verifies the internal structure of the Corpus data structures.
// It is intended for tests and debugging.
func (c *Corpus) Check() error {
if err := c.Gerrit().check(); err != nil {
return fmt.Errorf("gerrit: %v", err)
}
for hash, gc := range c.gitCommit {
if gc.Committer == placeholderCommitter {
return fmt.Errorf("corpus git commit %v has placeholder committer", hash)
}
if gc.Hash != hash {
return fmt.Errorf("git commit for key %q had GitCommit.Hash %q", hash, gc.Hash)
}
for _, pc := range gc.Parents {
if _, ok := c.gitCommit[pc.Hash]; !ok {
return fmt.Errorf("git commit %q exists but its parent %q does not", gc.Hash, pc.Hash)
}
}
}
return nil
}
// requires c.mu be held for writing
func (c *Corpus) str(s string) string {
if v, ok := c.strIntern[s]; ok {
return v
}
if c.strIntern == nil {
c.strIntern = make(map[string]string)
}
c.strIntern[s] = s
return s
}
func (c *Corpus) strb(b []byte) string {
if v, ok := c.strIntern[string(b)]; ok {
return v
}
return c.str(string(b))
}
func (c *Corpus) SetDebug() {
c.debug = true
}
func (c *Corpus) debugf(format string, v ...interface{}) {
if c.debug {
log.Printf(format, v...)
}
}
// gerritProjNameRx is the pattern describing a Gerrit project name.
// TODO: figure out if this is accurate.
var gerritProjNameRx = regexp.MustCompile(`^[a-z0-9]+[a-z0-9\-\_]*$`)
// TrackGoGitRepo registers a git directory to have its metadata slurped into the corpus.
// The goRepo is a name like "go" or "net". The dir is a path on disk.
func (c *Corpus) TrackGoGitRepo(goRepo, dir string) {
if c.mutationLogger == nil {
panic("can't TrackGoGitRepo in non-leader mode")
}
if !gerritProjNameRx.MatchString(goRepo) {
panic(fmt.Sprintf("bogus goRepo value %q", goRepo))
}
c.mu.Lock()
defer c.mu.Unlock()
c.pollGitDirs = append(c.pollGitDirs, polledGitCommits{
repo: &maintpb.GitRepo{GoRepo: goRepo},
dir: dir,
})
}
// A MutationSource yields a log of mutations that will catch a corpus
// back up to the present.
type MutationSource interface {
// GetMutations returns a channel of mutations or related events.
// The channel will never be closed.
// All sends on the returned channel should select
// on the provided context.
GetMutations(context.Context) <-chan MutationStreamEvent
}
// MutationStreamEvent represents one of three possible events while
// reading mutations from disk or another source.
// An event is either a mutation, an error, or reaching the current
// end of the log. Exactly one of the three fields will be non-zero.
type MutationStreamEvent struct {
Mutation *maintpb.Mutation
// Err is a fatal error reading the log. No other events will
// follow an Err.
Err error
// End, if true, means that all mutations have been sent and
// the next event might take some time to arrive (it might not
// have occurred yet). The End event is not a terminal state
// like Err. There may be multiple Ends.
End bool
}
// Initialize populates the Corpus using the data from the
// MutationSource. It returns once it's up-to-date. To incrementally
// update it later, use the Update method.
func (c *Corpus) Initialize(ctx context.Context, src MutationSource) error {
if c.mutationSource != nil {
panic("duplicate call to Initialize")
}
c.mutationSource = src
log.Printf("Loading data from log %T ...", src)
return c.update(ctx, nil)
}
// ErrSplit is returned when the client notices the leader's
// mutation log has changed. This can happen if the leader restarts
// with uncommitted transactions. (The leader only commits mutations
// periodically.)
var ErrSplit = errors.New("maintner: leader server's history split, process out of sync")
// Update incrementally updates the corpus from its current state to
// the latest state from the MutationSource passed earlier to
// Initialize. It does not return until there's either a new change or
// the context expires.
// If Update returns ErrSplit, the corpus can no longer be updated.
//
// Update must not be called concurrently with any other Update calls. If
// reading the corpus concurrently while the corpus is updating, you must hold
// the read lock using Corpus.RLock.
func (c *Corpus) Update(ctx context.Context) error {
if c.mutationSource == nil {
panic("Update called without call to Initialize")
}
if c.sawErrSplit {
panic("Update called after previous call returned ErrSplit")
}
log.Printf("Updating data from log %T ...", c.mutationSource)
err := c.update(ctx, nil)
if err == ErrSplit {
c.sawErrSplit = true
}
return err
}
// UpdateWithLocker behaves just like Update, but holds lk when processing
// mutation events.
func (c *Corpus) UpdateWithLocker(ctx context.Context, lk sync.Locker) error {
if c.mutationSource == nil {
panic("UpdateWithLocker called without call to Initialize")
}
if c.sawErrSplit {
panic("UpdateWithLocker called after previous call returned ErrSplit")
}
log.Printf("Updating data from log %T ...", c.mutationSource)
err := c.update(ctx, lk)
if err == ErrSplit {
c.sawErrSplit = true
}
return err
}
type noopLocker struct{}
func (noopLocker) Lock() {}
func (noopLocker) Unlock() {}
// lk optionally specifies a locker to use while processing mutations.
func (c *Corpus) update(ctx context.Context, lk sync.Locker) error {
src := c.mutationSource
ch := src.GetMutations(ctx)
done := ctx.Done()
c.mu.Lock()
defer c.mu.Unlock()
if lk == nil {
lk = noopLocker{}
}
for {
select {
case <-done:
err := ctx.Err()
log.Printf("Context expired while loading data from log %T: %v", src, err)
return err
case e := <-ch:
if e.Err != nil {
log.Printf("Corpus GetMutations: %v", e.Err)
return e.Err
}
if e.End {
c.didInit = true
lk.Lock()
c.finishProcessing()
lk.Unlock()
log.Printf("Reloaded data from log %T.", src)
return nil
}
lk.Lock()
c.processMutationLocked(e.Mutation)
lk.Unlock()
}
}
}
// addMutation adds a mutation to the log and immediately processes it.
func (c *Corpus) addMutation(m *maintpb.Mutation) {
if c.verbose {
log.Printf("mutation: %v", m)
}
c.mu.Lock()
c.processMutationLocked(m)
c.finishProcessing()
c.mu.Unlock()
if c.mutationLogger == nil {
return
}
err := c.mutationLogger.Log(m)
if err != nil {
// TODO: handle errors better? failing is only safe option.
log.Fatalf("could not log mutation %v: %v\n", m, err)
}
}
// c.mu must be held.
func (c *Corpus) processMutationLocked(m *maintpb.Mutation) {
if im := m.GithubIssue; im != nil {
c.processGithubIssueMutation(im)
}
if gm := m.Github; gm != nil {
c.processGithubMutation(gm)
}
if gm := m.Git; gm != nil {
c.processGitMutation(gm)
}
if gm := m.Gerrit; gm != nil {
c.processGerritMutation(gm)
}
}
// finishProcessing fixes up invariants and data structures before
// returning the Corpus from the Update loop back to the user.
//
// c.mu must be held.
func (c *Corpus) finishProcessing() {
c.gerrit.finishProcessing()
}
// SyncLoop runs forever (until an error or context expiration) and
// updates the corpus as the tracked sources change.
func (c *Corpus) SyncLoop(ctx context.Context) error {
return c.sync(ctx, true)
}
// Sync updates the corpus from its tracked sources.
func (c *Corpus) Sync(ctx context.Context) error {
return c.sync(ctx, false)
}
func (c *Corpus) sync(ctx context.Context, loop bool) error {
if _, ok := c.mutationSource.(*netMutSource); ok {
return errors.New("maintner: can't run Corpus.Sync on a Corpus using NetworkMutationSource (did you mean Update?)")
}
group, ctx := errgroup.WithContext(ctx)
for _, w := range c.watchedGithubRepos {
gr, token := w.gr, w.token
group.Go(func() error {
log.Printf("Polling %v ...", gr.id)
for {
err := gr.sync(ctx, token, loop)
if loop && isTempErr(err) {
log.Printf("Temporary error from github %v: %v", gr.ID(), err)
time.Sleep(30 * time.Second)
continue
}
log.Printf("github sync ending for %v: %v", gr.ID(), err)
return err
}
})
}
for _, rp := range c.pollGitDirs {
rp := rp
group.Go(func() error {
for {
err := c.syncGitCommits(ctx, rp, loop)
if loop && isTempErr(err) {
log.Printf("Temporary error from git repo %v: %v", rp.dir, err)
time.Sleep(30 * time.Second)
continue
}
log.Printf("git sync ending for %v: %v", rp.dir, err)
return err
}
})
}
for _, w := range c.watchedGerritRepos {
gp := w.project
group.Go(func() error {
log.Printf("Polling gerrit %v ...", gp.proj)
for {
err := gp.sync(ctx, loop)
if loop && isTempErr(err) {
log.Printf("Temporary error from gerrit %v: %v", gp.proj, err)
time.Sleep(30 * time.Second)
continue
}
log.Printf("gerrit sync ending for %v: %v", gp.proj, err)
return err
}
})
}
return group.Wait()
}
func isTempErr(err error) bool {
log.Printf("IS TEMP ERROR? %T %v", err, err)
return true
}