certmagic/filestorage.go
a 1a2275d54c
fs storage: Use temporary files when writing (#300)
* fix: use an tmp file to flush new certs to disk

* add readme
2024-08-04 13:37:03 -06:00

450 lines
14 KiB
Go

// Copyright 2015 Matthew Holt
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package certmagic
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"log"
"os"
"path"
"path/filepath"
"runtime"
"time"
"github.com/caddyserver/certmagic/internal/atomicfile"
)
// FileStorage facilitates forming file paths derived from a root
// directory. It is used to get file paths in a consistent,
// cross-platform way or persisting ACME assets on the file system.
// The presence of a lock file for a given key indicates a lock
// is held and is thus unavailable.
//
// Locks are created atomically by relying on the file system to
// enforce the O_EXCL flag. Acquirers that are forcefully terminated
// will not have a chance to clean up their locks before they exit,
// so locks may become stale. That is why, while a lock is actively
// held, the contents of the lockfile are updated with the current
// timestamp periodically. If another instance tries to acquire the
// lock but fails, it can see if the timestamp within is still fresh.
// If so, it patiently waits by polling occasionally. Otherwise,
// the stale lockfile is deleted, essentially forcing an unlock.
//
// While locking is atomic, unlocking is not perfectly atomic. File
// systems offer native atomic operations when creating files, but
// not necessarily when deleting them. It is theoretically possible
// for two instances to discover the same stale lock and both proceed
// to delete it, but if one instance is able to delete the lockfile
// and create a new one before the other one calls delete, then the
// new lock file created by the first instance will get deleted by
// mistake. This does mean that mutual exclusion is not guaranteed
// to be perfectly enforced in the presence of stale locks. One
// alternative is to lock the unlock operation by using ".unlock"
// files; and we did this for some time, but those files themselves
// may become stale, leading applications into infinite loops if
// they always expect the unlock file to be deleted by the instance
// that created it. We instead prefer the simpler solution that
// implies imperfect mutual exclusion if locks become stale, but
// that is probably less severe a consequence than infinite loops.
//
// See https://github.com/caddyserver/caddy/issues/4448 for discussion.
// See commit 468bfd25e452196b140148928cdd1f1a2285ae4b for where we
// switched away from using .unlock files.
type FileStorage struct {
Path string
}
// Exists returns true if key exists in s.
func (s *FileStorage) Exists(_ context.Context, key string) bool {
_, err := os.Stat(s.Filename(key))
return !errors.Is(err, fs.ErrNotExist)
}
// Store saves value at key.
func (s *FileStorage) Store(_ context.Context, key string, value []byte) error {
filename := s.Filename(key)
err := os.MkdirAll(filepath.Dir(filename), 0700)
if err != nil {
return err
}
fp, err := atomicfile.New(filename, 0o600)
if err != nil {
return err
}
_, err = fp.Write(value)
if err != nil {
// cancel the write
fp.Cancel()
return err
}
// close, thereby flushing the write
return fp.Close()
}
// Load retrieves the value at key.
func (s *FileStorage) Load(_ context.Context, key string) ([]byte, error) {
// i believe it's possible for the read call to error but still return bytes, in event of something like a shortread?
// therefore, i think it's appropriate to not return any bytes to avoid downstream users of the package erroniously believing that
// bytes read + error is a valid response (it should not be)
xs, err := os.ReadFile(s.Filename(key))
if err != nil {
return nil, err
}
return xs, nil
}
// Delete deletes the value at key.
func (s *FileStorage) Delete(_ context.Context, key string) error {
return os.RemoveAll(s.Filename(key))
}
// List returns all keys that match prefix.
func (s *FileStorage) List(ctx context.Context, prefix string, recursive bool) ([]string, error) {
var keys []string
walkPrefix := s.Filename(prefix)
err := filepath.Walk(walkPrefix, func(fpath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info == nil {
return fmt.Errorf("%s: file info is nil", fpath)
}
if fpath == walkPrefix {
return nil
}
if ctxErr := ctx.Err(); ctxErr != nil {
return ctxErr
}
suffix, err := filepath.Rel(walkPrefix, fpath)
if err != nil {
return fmt.Errorf("%s: could not make path relative: %v", fpath, err)
}
keys = append(keys, path.Join(prefix, suffix))
if !recursive && info.IsDir() {
return filepath.SkipDir
}
return nil
})
return keys, err
}
// Stat returns information about key.
func (s *FileStorage) Stat(_ context.Context, key string) (KeyInfo, error) {
fi, err := os.Stat(s.Filename(key))
if err != nil {
return KeyInfo{}, err
}
return KeyInfo{
Key: key,
Modified: fi.ModTime(),
Size: fi.Size(),
IsTerminal: !fi.IsDir(),
}, nil
}
// Filename returns the key as a path on the file
// system prefixed by s.Path.
func (s *FileStorage) Filename(key string) string {
return filepath.Join(s.Path, filepath.FromSlash(key))
}
// Lock obtains a lock named by the given name. It blocks
// until the lock can be obtained or an error is returned.
func (s *FileStorage) Lock(ctx context.Context, name string) error {
filename := s.lockFilename(name)
// sometimes the lockfiles read as empty (size 0) - this is either a stale lock or it
// is currently being written; we can retry a few times in this case, as it has been
// shown to help (issue #232)
var emptyCount int
for {
err := createLockfile(filename)
if err == nil {
// got the lock, yay
return nil
}
if !os.IsExist(err) {
// unexpected error
return fmt.Errorf("creating lock file: %v", err)
}
// lock file already exists
var meta lockMeta
f, err := os.Open(filename)
if err == nil {
err2 := json.NewDecoder(f).Decode(&meta)
f.Close()
if errors.Is(err2, io.EOF) {
emptyCount++
if emptyCount < 8 {
// wait for brief time and retry; could be that the file is in the process
// of being written or updated (which involves truncating) - see issue #232
select {
case <-time.After(250 * time.Millisecond):
case <-ctx.Done():
return ctx.Err()
}
continue
} else {
// lockfile is empty or truncated multiple times; I *think* we can assume
// the previous acquirer either crashed or had some sort of failure that
// caused them to be unable to fully acquire or retain the lock, therefore
// we should treat it as if the lockfile did not exist
log.Printf("[INFO][%s] %s: Empty lockfile (%v) - likely previous process crashed or storage medium failure; treating as stale", s, filename, err2)
}
} else if err2 != nil {
return fmt.Errorf("decoding lockfile contents: %w", err2)
}
}
switch {
case os.IsNotExist(err):
// must have just been removed; try again to create it
continue
case err != nil:
// unexpected error
return fmt.Errorf("accessing lock file: %v", err)
case fileLockIsStale(meta):
// lock file is stale - delete it and try again to obtain lock
// (NOTE: locking becomes imperfect if lock files are stale; known solutions
// either have potential to cause infinite loops, as in caddyserver/caddy#4448,
// or must give up on perfect mutual exclusivity; however, these cases are rare,
// so we prefer the simpler solution that avoids infinite loops)
log.Printf("[INFO][%s] Lock for '%s' is stale (created: %s, last update: %s); removing then retrying: %s",
s, name, meta.Created, meta.Updated, filename)
if err = os.Remove(filename); err != nil { // hopefully we can replace the lock file quickly!
if !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("unable to delete stale lockfile; deadlocked: %w", err)
}
}
continue
default:
// lockfile exists and is not stale;
// just wait a moment and try again,
// or return if context cancelled
select {
case <-time.After(fileLockPollInterval):
case <-ctx.Done():
return ctx.Err()
}
}
}
}
// Unlock releases the lock for name.
func (s *FileStorage) Unlock(_ context.Context, name string) error {
return os.Remove(s.lockFilename(name))
}
func (s *FileStorage) String() string {
return "FileStorage:" + s.Path
}
func (s *FileStorage) lockFilename(name string) string {
return filepath.Join(s.lockDir(), StorageKeys.Safe(name)+".lock")
}
func (s *FileStorage) lockDir() string {
return filepath.Join(s.Path, "locks")
}
func fileLockIsStale(meta lockMeta) bool {
ref := meta.Updated
if ref.IsZero() {
ref = meta.Created
}
// since updates are exactly every lockFreshnessInterval,
// add a grace period for the actual file read+write to
// take place
return time.Since(ref) > lockFreshnessInterval*2
}
// createLockfile atomically creates the lockfile
// identified by filename. A successfully created
// lockfile should be removed with removeLockfile.
func createLockfile(filename string) error {
err := atomicallyCreateFile(filename, true)
if err != nil {
return err
}
go keepLockfileFresh(filename)
return nil
}
// keepLockfileFresh continuously updates the lock file
// at filename with the current timestamp. It stops
// when the file disappears (happy path = lock released),
// or when there is an error at any point. Since it polls
// every lockFreshnessInterval, this function might
// not terminate until up to lockFreshnessInterval after
// the lock is released.
func keepLockfileFresh(filename string) {
defer func() {
if err := recover(); err != nil {
buf := make([]byte, stackTraceBufferSize)
buf = buf[:runtime.Stack(buf, false)]
log.Printf("panic: active locking: %v\n%s", err, buf)
}
}()
for {
time.Sleep(lockFreshnessInterval)
done, err := updateLockfileFreshness(filename)
if err != nil {
log.Printf("[ERROR] Keeping lock file fresh: %v - terminating lock maintenance (lockfile: %s)", err, filename)
return
}
if done {
return
}
}
}
// updateLockfileFreshness updates the lock file at filename
// with the current timestamp. It returns true if the parent
// loop can terminate (i.e. no more need to update the lock).
func updateLockfileFreshness(filename string) (bool, error) {
f, err := os.OpenFile(filename, os.O_RDWR, 0644)
if os.IsNotExist(err) {
return true, nil // lock released
}
if err != nil {
return true, err
}
defer f.Close()
// read contents
metaBytes, err := io.ReadAll(io.LimitReader(f, 2048))
if err != nil {
return true, err
}
var meta lockMeta
if err := json.Unmarshal(metaBytes, &meta); err != nil {
// see issue #232: this can error if the file is empty,
// which happens sometimes when the disk is REALLY slow
return true, err
}
// truncate file and reset I/O offset to beginning
if err := f.Truncate(0); err != nil {
return true, err
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return true, err
}
// write updated timestamp
meta.Updated = time.Now()
if err = json.NewEncoder(f).Encode(meta); err != nil {
return false, err
}
// sync to device; we suspect that sometimes file systems
// (particularly AWS EFS) don't do this on their own,
// leaving the file empty when we close it; see
// https://github.com/caddyserver/caddy/issues/3954
return false, f.Sync()
}
// atomicallyCreateFile atomically creates the file
// identified by filename if it doesn't already exist.
func atomicallyCreateFile(filename string, writeLockInfo bool) error {
// no need to check this error, we only really care about the file creation error
_ = os.MkdirAll(filepath.Dir(filename), 0700)
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0644)
if err != nil {
return err
}
defer f.Close()
if writeLockInfo {
now := time.Now()
meta := lockMeta{
Created: now,
Updated: now,
}
if err := json.NewEncoder(f).Encode(meta); err != nil {
return err
}
// see https://github.com/caddyserver/caddy/issues/3954
if err := f.Sync(); err != nil {
return err
}
}
return nil
}
// homeDir returns the best guess of the current user's home
// directory from environment variables. If unknown, "." (the
// current directory) is returned instead.
func homeDir() string {
home := os.Getenv("HOME")
if home == "" && runtime.GOOS == "windows" {
drive := os.Getenv("HOMEDRIVE")
path := os.Getenv("HOMEPATH")
home = drive + path
if drive == "" || path == "" {
home = os.Getenv("USERPROFILE")
}
}
if home == "" {
home = "."
}
return home
}
func dataDir() string {
baseDir := filepath.Join(homeDir(), ".local", "share")
if xdgData := os.Getenv("XDG_DATA_HOME"); xdgData != "" {
baseDir = xdgData
}
return filepath.Join(baseDir, "certmagic")
}
// lockMeta is written into a lock file.
type lockMeta struct {
Created time.Time `json:"created,omitempty"`
Updated time.Time `json:"updated,omitempty"`
}
// lockFreshnessInterval is how often to update
// a lock's timestamp. Locks with a timestamp
// more than this duration in the past (plus a
// grace period for latency) can be considered
// stale.
const lockFreshnessInterval = 5 * time.Second
// fileLockPollInterval is how frequently
// to check the existence of a lock file
const fileLockPollInterval = 1 * time.Second
// Interface guard
var _ Storage = (*FileStorage)(nil)