Skip to content

Commit

Permalink
cmd/go: abstract build cache, support implementations via child process
Browse files Browse the repository at this point in the history
Via setting GOCACHEPROG to a binary which speaks JSON over
stdin/stdout.

Updates golang#59719

Signed-off-by: Brad Fitzpatrick <[email protected]>
Change-Id: I824ff04d5ebdf0ba4d1b5bc2e9fbaee26d34c80f
  • Loading branch information
bradfitz committed Apr 21, 2023
1 parent 903a25a commit 337b68d
Show file tree
Hide file tree
Showing 8 changed files with 484 additions and 54 deletions.
96 changes: 74 additions & 22 deletions src/cmd/go/internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,50 @@ type ActionID [HashSize]byte
// An OutputID is a cache output key, the hash of an output of a computation.
type OutputID [HashSize]byte

// Cache is the interface as used by the cmd/go.
type Cache interface {
// Get returns the cache entry for the provided ActionID.
// On miss, the error type should be of type *entryNotFoundError.
//
// After a success call to Get, OutputFile(Entry.OutputID) must
// exist on disk for until Close is called (at the end of the process).
Get(ActionID) (Entry, error)

// Put adds an item to the cache.
//
// The seeker is only used to seek to the beginning. After a call to Put,
// the seek position is not guaranteed to be in any particular state.
//
// As a special case, if the ReadSeeker is of type noVerifyReadSeeker,
// the verification from GODEBUG=goverifycache=1 is skipped.
//
// After a success call to Get, OutputFile(Entry.OutputID) must
// exist on disk for until Close is called (at the end of the process).
Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error)

// Close is called at the end of the go process. Implementations can do
// cache cleanup work at this phase, or wait for and report any errors from
// background cleanup work started earlier. Any cache trimming should in one
// process should not violate cause the invariants of this interface to be
// violated in another process. Namely, a cache trim from one process should
// not delete an ObjectID from disk that was recently Get or Put from
// another process. As a rule of thumb, don't trim things used in the last
// day.
Close() error

// OutputFile returns the path on disk where OutputID is stored.
//
// It's only called after a successful get or put call so it doesn't need
// to return an error; it's assumed that if the previous get or put succeeded,
// it's already on disk.
OutputFile(OutputID) string

// FuzzDir returns where fuzz files are stored.
FuzzDir() string
}

// A Cache is a package cache, backed by a file system directory tree.
type Cache struct {
type DiskCache struct {
dir string
now func() time.Time
}
Expand All @@ -48,7 +90,7 @@ type Cache struct {
// to share a cache directory (for example, if the directory were stored
// in a network file system). File locking is notoriously unreliable in
// network file systems and may not suffice to protect the cache.
func Open(dir string) (*Cache, error) {
func Open(dir string) (*DiskCache, error) {
info, err := os.Stat(dir)
if err != nil {
return nil, err
Expand All @@ -62,15 +104,15 @@ func Open(dir string) (*Cache, error) {
return nil, err
}
}
c := &Cache{
c := &DiskCache{
dir: dir,
now: time.Now,
}
return c, nil
}

// fileName returns the name of the file corresponding to the given id.
func (c *Cache) fileName(id [HashSize]byte, key string) string {
func (c *DiskCache) fileName(id [HashSize]byte, key string) string {
return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
}

Expand Down Expand Up @@ -136,7 +178,7 @@ func initEnv() {
// returning the corresponding output ID and file size, if any.
// Note that finding an output ID does not guarantee that the
// saved file for that output ID is still available.
func (c *Cache) Get(id ActionID) (Entry, error) {
func (c *DiskCache) Get(id ActionID) (Entry, error) {
if verify {
return Entry{}, &entryNotFoundError{Err: errVerifyMode}
}
Expand All @@ -150,7 +192,7 @@ type Entry struct {
}

// get is Get but does not respect verify mode, so that Put can use it.
func (c *Cache) get(id ActionID) (Entry, error) {
func (c *DiskCache) get(id ActionID) (Entry, error) {
missing := func(reason error) (Entry, error) {
return Entry{}, &entryNotFoundError{Err: reason}
}
Expand Down Expand Up @@ -214,7 +256,7 @@ func (c *Cache) get(id ActionID) (Entry, error) {

// GetFile looks up the action ID in the cache and returns
// the name of the corresponding data file.
func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) {
entry, err = c.Get(id)
if err != nil {
return "", Entry{}, err
Expand All @@ -233,7 +275,7 @@ func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
// GetBytes looks up the action ID in the cache and returns
// the corresponding output bytes.
// GetBytes should only be used for data that can be expected to fit in memory.
func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) {
entry, err := c.Get(id)
if err != nil {
return nil, entry, err
Expand All @@ -248,7 +290,7 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
// GetMmap looks up the action ID in the cache and returns
// the corresponding output bytes.
// GetMmap should only be used for data that can be expected to fit in memory.
func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
func GetMmap(c Cache, id ActionID) ([]byte, Entry, error) {
entry, err := c.Get(id)
if err != nil {
return nil, entry, err
Expand All @@ -264,7 +306,7 @@ func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
}

// OutputFile returns the name of the cache file storing output with the given OutputID.
func (c *Cache) OutputFile(out OutputID) string {
func (c *DiskCache) OutputFile(out OutputID) string {
file := c.fileName(out, "d")
c.used(file)
return file
Expand Down Expand Up @@ -297,16 +339,18 @@ const (
// mtime is more than an hour old. This heuristic eliminates
// nearly all of the mtime updates that would otherwise happen,
// while still keeping the mtimes useful for cache trimming.
func (c *Cache) used(file string) {
func (c *DiskCache) used(file string) {
info, err := os.Stat(file)
if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
return
}
os.Chtimes(file, c.now(), c.now())
}

func (c *DiskCache) Close() error { return c.Trim() }

// Trim removes old cache entries that are likely not to be reused.
func (c *Cache) Trim() error {
func (c *DiskCache) Trim() error {
now := c.now()

// We maintain in dir/trim.txt the time of the last completed cache trim.
Expand Down Expand Up @@ -346,7 +390,7 @@ func (c *Cache) Trim() error {
}

// trimSubdir trims a single cache subdirectory.
func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) {
// Read all directory entries from subdir before removing
// any files, in case removing files invalidates the file offset
// in the directory scan. Also, ignore error from f.Readdirnames,
Expand Down Expand Up @@ -374,7 +418,7 @@ func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {

// putIndexEntry adds an entry to the cache recording that executing the action
// with the given id produces an output with the given output id (hash) and size.
func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
// Note: We expect that for one reason or another it may happen
// that repeating an action produces a different output hash
// (for example, if the output contains a time stamp or temp dir name).
Expand Down Expand Up @@ -428,21 +472,29 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
return nil
}

// noVerifyReadSeeker is a io.ReadSeeker wrapper sentinel type
// that says that Cache.Put should skip the verify check
// (from GODEBUG=goverifycache=1).
type noVerifyReadSeeker struct {
io.ReadSeeker
}

// Put stores the given output in the cache as the output for the action ID.
// It may read file twice. The content of file must not change between the two passes.
func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
return c.put(id, file, true)
func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
_, isNoVerify := file.(noVerifyReadSeeker)
return c.put(id, file, !isNoVerify)
}

// PutNoVerify is like Put but disables the verify check
// when GODEBUG=goverifycache=1 is set.
// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
// like test output containing times and the like.
func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
return c.put(id, file, false)
func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
return c.Put(id, noVerifyReadSeeker{file})
}

func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
func (c *DiskCache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
// Compute output ID.
h := sha256.New()
if _, err := file.Seek(0, 0); err != nil {
Expand All @@ -465,14 +517,14 @@ func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID
}

// PutBytes stores the given bytes in the cache as the output for the action ID.
func (c *Cache) PutBytes(id ActionID, data []byte) error {
func PutBytes(c Cache, id ActionID, data []byte) error {
_, _, err := c.Put(id, bytes.NewReader(data))
return err
}

// copyFile copies file into the cache, expecting it to have the given
// output ID and size, if that file is not present already.
func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
func (c *DiskCache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
name := c.fileName(out, "d")
info, err := os.Stat(name)
if err == nil && info.Size() == size {
Expand Down Expand Up @@ -562,6 +614,6 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
// They may be removed with 'go clean -fuzzcache'.
//
// TODO(#48526): make Trim remove unused files from this directory.
func (c *Cache) FuzzDir() string {
func (c *DiskCache) FuzzDir() string {
return filepath.Join(c.dir, "fuzz")
}
14 changes: 10 additions & 4 deletions src/cmd/go/internal/cache/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ import (

// Default returns the default cache to use.
// It never returns nil.
func Default() *Cache {
func Default() Cache {
defaultOnce.Do(initDefaultCache)
return defaultCache
}

var (
defaultOnce sync.Once
defaultCache *Cache
defaultCache Cache
)

// cacheREADME is a message stored in a README in the cache directory.
Expand Down Expand Up @@ -53,11 +53,17 @@ func initDefaultCache() {
os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666)
}

c, err := Open(dir)
diskCache, err := Open(dir)
if err != nil {
base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err)
}
defaultCache = c

if v := cfg.Getenv("GOCACHEPROG"); v != "" {
defaultCache = startCacheProg(v, diskCache)
return
} else {
defaultCache = diskCache
}
}

var (
Expand Down
Loading

0 comments on commit 337b68d

Please sign in to comment.