// Copyright 2015 Matthew Holt // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package certmagic import ( "bytes" "context" "crypto" "crypto/rand" "crypto/tls" "crypto/x509" "crypto/x509/pkix" "encoding/asn1" "encoding/json" "encoding/pem" "errors" "fmt" "io/fs" weakrand "math/rand" "net" "net/http" "net/url" "strings" "time" "github.com/mholt/acmez/v2" "github.com/mholt/acmez/v2/acme" "go.uber.org/zap" "golang.org/x/crypto/ocsp" "golang.org/x/net/idna" ) // Config configures a certificate manager instance. // An empty Config is not valid: use New() to obtain // a valid Config. type Config struct { // How much of a certificate's lifetime becomes the // renewal window, which is the span of time at the // end of the certificate's validity period in which // it should be renewed; for most certificates, the // global default is good, but for extremely short- // lived certs, you may want to raise this to ~0.5. // Ratio is remaining:total lifetime. RenewalWindowRatio float64 // An optional event callback clients can set // to subscribe to certain things happening // internally by this config; invocations are // synchronous, so make them return quickly! // Functions should honor context cancellation. // // An error should only be returned to advise // the emitter to abort or cancel an upcoming // event. Some events, especially those that have // already happened, cannot be aborted. For example, // cert_obtaining can be canceled, but // cert_obtained cannot. Emitters may choose to // ignore returned errors. OnEvent func(ctx context.Context, event string, data map[string]any) error // DefaultServerName specifies a server name // to use when choosing a certificate if the // ClientHello's ServerName field is empty. DefaultServerName string // FallbackServerName specifies a server name // to use when choosing a certificate if the // ClientHello's ServerName field doesn't match // any available certificate. // EXPERIMENTAL: Subject to change or removal. FallbackServerName string // The state needed to operate on-demand TLS; // if non-nil, on-demand TLS is enabled and // certificate operations are deferred to // TLS handshakes (or as-needed). // TODO: Can we call this feature "Reactive/Lazy/Passive TLS" instead? OnDemand *OnDemandConfig // Adds the must staple TLS extension to the CSR. MustStaple bool // Sources for getting new, managed certificates; // the default Issuer is ACMEIssuer. If multiple // issuers are specified, they will be tried in // turn until one succeeds. Issuers []Issuer // How to select which issuer to use. // Default: UseFirstIssuer (subject to change). IssuerPolicy IssuerPolicy // If true, private keys already existing in storage // will be reused. Otherwise, a new key will be // created for every new certificate to mitigate // pinning and reduce the scope of key compromise. // Default: false (do not reuse keys). ReusePrivateKeys bool // The source of new private keys for certificates; // the default KeySource is StandardKeyGenerator. KeySource KeyGenerator // CertSelection chooses one of the certificates // with which the ClientHello will be completed; // if not set, DefaultCertificateSelector will // be used. CertSelection CertificateSelector // OCSP configures how OCSP is handled. By default, // OCSP responses are fetched for every certificate // with a responder URL, and cached on disk. Changing // these defaults is STRONGLY discouraged unless you // have a compelling reason to put clients at greater // risk and reduce their privacy. OCSP OCSPConfig // The storage to access when storing or loading // TLS assets. Default is the local file system. Storage Storage // CertMagic will verify the storage configuration // is acceptable before obtaining a certificate // to avoid information loss after an expensive // operation. If you are absolutely 100% sure your // storage is properly configured and has sufficient // space, you can disable this check to reduce I/O // if that is expensive for you. // EXPERIMENTAL: Subject to change or removal. DisableStorageCheck bool // SubjectTransformer is a hook that can transform the // subject (SAN) of a certificate being loaded or issued. // For example, a common use case is to replace the // left-most label with an asterisk (*) to become a // wildcard certificate. // EXPERIMENTAL: Subject to change or removal. SubjectTransformer func(ctx context.Context, domain string) string // Disables both ARI fetching and the use of ARI for renewal decisions. // TEMPORARY: Will likely be removed in the future. DisableARI bool // Set a logger to enable logging. If not set, // a default logger will be created. Logger *zap.Logger // required pointer to the in-memory cert cache certCache *Cache } // NewDefault makes a valid config based on the package // Default config. Most users will call this function // instead of New() since most use cases require only a // single config for any and all certificates. // // If your requirements are more advanced (for example, // multiple configs depending on the certificate), then use // New() instead. (You will need to make your own Cache // first.) If you only need a single Config to manage your // certs (even if that config changes, as long as it is the // only one), customize the Default package variable before // calling NewDefault(). // // All calls to NewDefault() will return configs that use the // same, default certificate cache. All configs returned // by NewDefault() are based on the values of the fields of // Default at the time it is called. // // This is the only way to get a config that uses the // default certificate cache. func NewDefault() *Config { defaultCacheMu.Lock() if defaultCache == nil { defaultCache = NewCache(CacheOptions{ // the cache will likely need to renew certificates, // so it will need to know how to do that, which // depends on the certificate being managed and which // can change during the lifetime of the cache; this // callback makes it possible to get the latest and // correct config with which to manage the cert, // but if the user does not provide one, we can only // assume that we are to use the default config GetConfigForCert: func(Certificate) (*Config, error) { return NewDefault(), nil }, Logger: Default.Logger, }) } certCache := defaultCache defaultCacheMu.Unlock() return newWithCache(certCache, Default) } // New makes a new, valid config based on cfg and // uses the provided certificate cache. certCache // MUST NOT be nil or this function will panic. // // Use this method when you have an advanced use case // that requires a custom certificate cache and config // that may differ from the Default. For example, if // not all certificates are managed/renewed the same // way, you need to make your own Cache value with a // GetConfigForCert callback that returns the correct // configuration for each certificate. However, for // the vast majority of cases, there will be only a // single Config, thus the default cache (which always // uses the default Config) and default config will // suffice, and you should use NewDefault() instead. func New(certCache *Cache, cfg Config) *Config { if certCache == nil { panic("a certificate cache is required") } certCache.optionsMu.RLock() getConfigForCert := certCache.options.GetConfigForCert defer certCache.optionsMu.RUnlock() if getConfigForCert == nil { panic("cache must have GetConfigForCert set in its options") } return newWithCache(certCache, cfg) } // newWithCache ensures that cfg is a valid config by populating // zero-value fields from the Default Config. If certCache is // nil, this function panics. func newWithCache(certCache *Cache, cfg Config) *Config { if certCache == nil { panic("cannot make a valid config without a pointer to a certificate cache") } if cfg.OnDemand == nil { cfg.OnDemand = Default.OnDemand } if !cfg.MustStaple { cfg.MustStaple = Default.MustStaple } if cfg.Issuers == nil { cfg.Issuers = Default.Issuers if cfg.Issuers == nil { // at least one issuer is absolutely required if not nil cfg.Issuers = []Issuer{NewACMEIssuer(&cfg, DefaultACME)} } } if cfg.RenewalWindowRatio == 0 { cfg.RenewalWindowRatio = Default.RenewalWindowRatio } if cfg.OnEvent == nil { cfg.OnEvent = Default.OnEvent } if cfg.KeySource == nil { cfg.KeySource = Default.KeySource } if cfg.DefaultServerName == "" { cfg.DefaultServerName = Default.DefaultServerName } if cfg.FallbackServerName == "" { cfg.FallbackServerName = Default.FallbackServerName } if cfg.Storage == nil { cfg.Storage = Default.Storage } if cfg.Logger == nil { cfg.Logger = Default.Logger } // absolutely don't allow a nil storage, // because that would make almost anything // a config can do pointless if cfg.Storage == nil { cfg.Storage = defaultFileStorage } // absolutely don't allow a nil logger either, // because that would result in panics if cfg.Logger == nil { cfg.Logger = defaultLogger } cfg.certCache = certCache return &cfg } // ManageSync causes the certificates for domainNames to be managed // according to cfg. If cfg.OnDemand is not nil, then this simply // allowlists the domain names and defers the certificate operations // to when they are needed. Otherwise, the certificates for each // name are loaded from storage or obtained from the CA if not already // in the cache associated with the Config. If loaded from storage, // they are renewed if they are expiring or expired. It then caches // the certificate in memory and is prepared to serve them up during // TLS handshakes. To change how an already-loaded certificate is // managed, update the cache options relating to getting a config for // a cert. // // Note that name allowlisting for on-demand management only takes // effect if cfg.OnDemand.DecisionFunc is not set (is nil); it will // not overwrite an existing DecisionFunc, nor will it overwrite // its decision; i.e. the implicit allowlist is only used if no // DecisionFunc is set. // // This method is synchronous, meaning that certificates for all // domainNames must be successfully obtained (or renewed) before // it returns. It returns immediately on the first error for any // of the given domainNames. This behavior is recommended for // interactive use (i.e. when an administrator is present) so // that errors can be reported and fixed immediately. func (cfg *Config) ManageSync(ctx context.Context, domainNames []string) error { return cfg.manageAll(ctx, domainNames, false) } // ManageAsync is the same as ManageSync, except that ACME // operations are performed asynchronously (in the background). // This method returns before certificates are ready. It is // crucial that the administrator monitors the logs and is // notified of any errors so that corrective action can be // taken as soon as possible. Any errors returned from this // method occurred before ACME transactions started. // // As long as logs are monitored, this method is typically // recommended for non-interactive environments. // // If there are failures loading, obtaining, or renewing a // certificate, it will be retried with exponential backoff // for up to about 30 days, with a maximum interval of about // 24 hours. Cancelling ctx will cancel retries and shut down // any goroutines spawned by ManageAsync. func (cfg *Config) ManageAsync(ctx context.Context, domainNames []string) error { return cfg.manageAll(ctx, domainNames, true) } // ClientCredentials returns a list of TLS client certificate chains for the given identifiers. // The return value can be used in a tls.Config to enable client authentication using managed certificates. // Any certificates that need to be obtained or renewed for these identifiers will be managed accordingly. func (cfg *Config) ClientCredentials(ctx context.Context, identifiers []string) ([]tls.Certificate, error) { err := cfg.manageAll(ctx, identifiers, false) if err != nil { return nil, err } var chains []tls.Certificate for _, id := range identifiers { certRes, err := cfg.loadCertResourceAnyIssuer(ctx, id) if err != nil { return chains, err } chain, err := tls.X509KeyPair(certRes.CertificatePEM, certRes.PrivateKeyPEM) if err != nil { return chains, err } chains = append(chains, chain) } return chains, nil } func (cfg *Config) manageAll(ctx context.Context, domainNames []string, async bool) error { if ctx == nil { ctx = context.Background() } if cfg.OnDemand != nil && cfg.OnDemand.hostAllowlist == nil { cfg.OnDemand.hostAllowlist = make(map[string]struct{}) } for _, domainName := range domainNames { domainName = normalizedName(domainName) // if on-demand is configured, defer obtain and renew operations if cfg.OnDemand != nil { cfg.OnDemand.hostAllowlist[domainName] = struct{}{} continue } // TODO: consider doing this in a goroutine if async, to utilize multiple cores while loading certs // otherwise, begin management immediately err := cfg.manageOne(ctx, domainName, async) if err != nil { return err } } return nil } func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool) error { // if certificate is already being managed, nothing to do; maintenance will continue certs := cfg.certCache.getAllMatchingCerts(domainName) for _, cert := range certs { if cert.managed { return nil } } // first try loading existing certificate from storage cert, err := cfg.CacheManagedCertificate(ctx, domainName) if err != nil { if !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("%s: caching certificate: %v", domainName, err) } // if we don't have one in storage, obtain one obtain := func() error { var err error if async { err = cfg.ObtainCertAsync(ctx, domainName) } else { err = cfg.ObtainCertSync(ctx, domainName) } if err != nil { return fmt.Errorf("%s: obtaining certificate: %w", domainName, err) } cert, err = cfg.CacheManagedCertificate(ctx, domainName) if err != nil { return fmt.Errorf("%s: caching certificate after obtaining it: %v", domainName, err) } return nil } if async { // Leave the job name empty so as to allow duplicate 'obtain' // jobs; this is because Caddy calls ManageAsync() before the // previous config is stopped (and before its context is // canceled), which means that if an obtain job is still // running for the same domain, Submit() would not queue the // new one because it is still running, even though it is // (probably) about to be canceled (it might not if the new // config fails to finish loading, however). In any case, we // presume it is safe to enqueue a duplicate obtain job because // either the old one (or sometimes the new one) is about to be // canceled. This seems like reasonable logic for any consumer // of this lib. See https://github.com/caddyserver/caddy/issues/3202 jm.Submit(cfg.Logger, "", obtain) return nil } return obtain() } // for an existing certificate, make sure it is renewed; or if it is revoked, // force a renewal even if it's not expiring renew := func() error { // first, ensure status is not revoked (it was just refreshed in CacheManagedCertificate above) if !cert.Expired() && cert.ocsp != nil && cert.ocsp.Status == ocsp.Revoked { _, err = cfg.forceRenew(ctx, cfg.Logger, cert) return err } // ensure ARI is updated before we check whether the cert needs renewing // (we ignore the second return value because we already check if needs renewing anyway) if !cfg.DisableARI && cert.ari.NeedsRefresh() { cert, _, err = cfg.updateARI(ctx, cert, cfg.Logger) if err != nil { cfg.Logger.Error("updating ARI upon managing", zap.Error(err)) } } // otherwise, simply renew the certificate if needed if cert.NeedsRenewal(cfg) { var err error if async { err = cfg.RenewCertAsync(ctx, domainName, false) } else { err = cfg.RenewCertSync(ctx, domainName, false) } if err != nil { return fmt.Errorf("%s: renewing certificate: %w", domainName, err) } // successful renewal, so update in-memory cache _, err = cfg.reloadManagedCertificate(ctx, cert) if err != nil { return fmt.Errorf("%s: reloading renewed certificate into memory: %v", domainName, err) } } return nil } if async { jm.Submit(cfg.Logger, "renew_"+domainName, renew) return nil } return renew() } // ObtainCertSync generates a new private key and obtains a certificate for // name using cfg in the foreground; i.e. interactively and without retries. // It stows the renewed certificate and its assets in storage if successful. // It DOES NOT load the certificate into the in-memory cache. This method // is a no-op if storage already has a certificate for name. func (cfg *Config) ObtainCertSync(ctx context.Context, name string) error { return cfg.obtainCert(ctx, name, true) } // ObtainCertAsync is the same as ObtainCertSync(), except it runs in the // background; i.e. non-interactively, and with retries if it fails. func (cfg *Config) ObtainCertAsync(ctx context.Context, name string) error { return cfg.obtainCert(ctx, name, false) } func (cfg *Config) obtainCert(ctx context.Context, name string, interactive bool) error { if len(cfg.Issuers) == 0 { return fmt.Errorf("no issuers configured; impossible to obtain or check for existing certificate in storage") } log := cfg.Logger.Named("obtain") name = cfg.transformSubject(ctx, log, name) // if storage has all resources for this certificate, obtain is a no-op if cfg.storageHasCertResourcesAnyIssuer(ctx, name) { return nil } // ensure storage is writeable and readable // TODO: this is not necessary every time; should only perform check once every so often for each storage, which may require some global state... err := cfg.checkStorage(ctx) if err != nil { return fmt.Errorf("failed storage check: %v - storage is probably misconfigured", err) } log.Info("acquiring lock", zap.String("identifier", name)) // ensure idempotency of the obtain operation for this name lockKey := cfg.lockKey(certIssueLockOp, name) err = acquireLock(ctx, cfg.Storage, lockKey) if err != nil { return fmt.Errorf("unable to acquire lock '%s': %v", lockKey, err) } defer func() { log.Info("releasing lock", zap.String("identifier", name)) if err := releaseLock(ctx, cfg.Storage, lockKey); err != nil { log.Error("unable to unlock", zap.String("identifier", name), zap.String("lock_key", lockKey), zap.Error(err)) } }() log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { // check if obtain is still needed -- might have been obtained during lock if cfg.storageHasCertResourcesAnyIssuer(ctx, name) { log.Info("certificate already exists in storage", zap.String("identifier", name)) return nil } log.Info("obtaining certificate", zap.String("identifier", name)) if err := cfg.emit(ctx, "cert_obtaining", map[string]any{"identifier": name}); err != nil { return fmt.Errorf("obtaining certificate aborted by event handler: %w", err) } // If storage has a private key already, use it; otherwise we'll generate our own. // Also create the slice of issuers we will try using according to any issuer // selection policy (it must be a copy of the slice so we don't mutate original). var privKey crypto.PrivateKey var privKeyPEM []byte var issuers []Issuer if cfg.ReusePrivateKeys { privKey, privKeyPEM, issuers, err = cfg.reusePrivateKey(ctx, name) if err != nil { return err } } else { issuers = make([]Issuer, len(cfg.Issuers)) copy(issuers, cfg.Issuers) } if cfg.IssuerPolicy == UseFirstRandomIssuer { weakrand.Shuffle(len(issuers), func(i, j int) { issuers[i], issuers[j] = issuers[j], issuers[i] }) } if privKey == nil { privKey, err = cfg.KeySource.GenerateKey() if err != nil { return err } privKeyPEM, err = PEMEncodePrivateKey(privKey) if err != nil { return err } } csr, err := cfg.generateCSR(privKey, []string{name}, false) if err != nil { return err } // try to obtain from each issuer until we succeed var issuedCert *IssuedCertificate var issuerUsed Issuer var issuerKeys []string for i, issuer := range issuers { issuerKeys = append(issuerKeys, issuer.IssuerKey()) log.Debug(fmt.Sprintf("trying issuer %d/%d", i+1, len(cfg.Issuers)), zap.String("issuer", issuer.IssuerKey())) if prechecker, ok := issuer.(PreChecker); ok { err = prechecker.PreCheck(ctx, []string{name}, interactive) if err != nil { continue } } // TODO: ZeroSSL's API currently requires CommonName to be set, and requires it be // distinct from SANs. If this was a cert it would violate the BRs, but their certs // are compliant, so their CSR requirements just needlessly add friction, complexity, // and inefficiency for clients. CommonName has been deprecated for 25+ years. useCSR := csr if issuer.IssuerKey() == zerosslIssuerKey { useCSR, err = cfg.generateCSR(privKey, []string{name}, true) if err != nil { return err } } issuedCert, err = issuer.Issue(ctx, useCSR) if err == nil { issuerUsed = issuer break } // err is usually wrapped, which is nice for simply printing it, but // with our structured error logs we only need the problem string errToLog := err var problem acme.Problem if errors.As(err, &problem) { errToLog = problem } log.Error("could not get certificate from issuer", zap.String("identifier", name), zap.String("issuer", issuer.IssuerKey()), zap.Error(errToLog)) } if err != nil { cfg.emit(ctx, "cert_failed", map[string]any{ "renewal": false, "identifier": name, "issuers": issuerKeys, "error": err, }) // only the error from the last issuer will be returned, but we logged the others return fmt.Errorf("[%s] Obtain: %w", name, err) } issuerKey := issuerUsed.IssuerKey() // success - immediately save the certificate resource metaJSON, err := json.Marshal(issuedCert.Metadata) if err != nil { log.Error("unable to encode certificate metadata", zap.Error(err)) } certRes := CertificateResource{ SANs: namesFromCSR(csr), CertificatePEM: issuedCert.Certificate, PrivateKeyPEM: privKeyPEM, IssuerData: metaJSON, issuerKey: issuerUsed.IssuerKey(), } err = cfg.saveCertResource(ctx, issuerUsed, certRes) if err != nil { return fmt.Errorf("[%s] Obtain: saving assets: %v", name, err) } log.Info("certificate obtained successfully", zap.String("identifier", name), zap.String("issuer", issuerUsed.IssuerKey())) certKey := certRes.NamesKey() cfg.emit(ctx, "cert_obtained", map[string]any{ "renewal": false, "identifier": name, "issuer": issuerUsed.IssuerKey(), "storage_path": StorageKeys.CertsSitePrefix(issuerKey, certKey), "private_key_path": StorageKeys.SitePrivateKey(issuerKey, certKey), "certificate_path": StorageKeys.SiteCert(issuerKey, certKey), "metadata_path": StorageKeys.SiteMeta(issuerKey, certKey), "csr_pem": pem.EncodeToMemory(&pem.Block{ Type: "CERTIFICATE REQUEST", Bytes: csr.Raw, }), }) return nil } if interactive { err = f(ctx) } else { err = doWithRetry(ctx, log, f) } return err } // reusePrivateKey looks for a private key for domain in storage in the configured issuers // paths. For the first private key it finds, it returns that key both decoded and PEM-encoded, // as well as the reordered list of issuers to use instead of cfg.Issuers (because if a key // is found, that issuer should be tried first, so it is moved to the front in a copy of // cfg.Issuers). func (cfg *Config) reusePrivateKey(ctx context.Context, domain string) (privKey crypto.PrivateKey, privKeyPEM []byte, issuers []Issuer, err error) { // make a copy of cfg.Issuers so that if we have to reorder elements, we don't // inadvertently mutate the configured issuers (see append calls below) issuers = make([]Issuer, len(cfg.Issuers)) copy(issuers, cfg.Issuers) for i, issuer := range issuers { // see if this issuer location in storage has a private key for the domain privateKeyStorageKey := StorageKeys.SitePrivateKey(issuer.IssuerKey(), domain) privKeyPEM, err = cfg.Storage.Load(ctx, privateKeyStorageKey) if errors.Is(err, fs.ErrNotExist) { err = nil // obviously, it's OK to not have a private key; so don't prevent obtaining a cert continue } if err != nil { return nil, nil, nil, fmt.Errorf("loading existing private key for reuse with issuer %s: %v", issuer.IssuerKey(), err) } // we loaded a private key; try decoding it so we can use it privKey, err = PEMDecodePrivateKey(privKeyPEM) if err != nil { return nil, nil, nil, err } // since the private key was found in storage for this issuer, move it // to the front of the list so we prefer this issuer first issuers = append([]Issuer{issuer}, append(issuers[:i], issuers[i+1:]...)...) break } return } // storageHasCertResourcesAnyIssuer returns true if storage has all the // certificate resources in storage from any configured issuer. It checks // all configured issuers in order. func (cfg *Config) storageHasCertResourcesAnyIssuer(ctx context.Context, name string) bool { for _, iss := range cfg.Issuers { if cfg.storageHasCertResources(ctx, iss, name) { return true } } return false } // RenewCertSync renews the certificate for name using cfg in the foreground; // i.e. interactively and without retries. It stows the renewed certificate // and its assets in storage if successful. It DOES NOT update the in-memory // cache with the new certificate. The certificate will not be renewed if it // is not close to expiring unless force is true. func (cfg *Config) RenewCertSync(ctx context.Context, name string, force bool) error { return cfg.renewCert(ctx, name, force, true) } // RenewCertAsync is the same as RenewCertSync(), except it runs in the // background; i.e. non-interactively, and with retries if it fails. func (cfg *Config) RenewCertAsync(ctx context.Context, name string, force bool) error { return cfg.renewCert(ctx, name, force, false) } func (cfg *Config) renewCert(ctx context.Context, name string, force, interactive bool) error { if len(cfg.Issuers) == 0 { return fmt.Errorf("no issuers configured; impossible to renew or check existing certificate in storage") } log := cfg.Logger.Named("renew") name = cfg.transformSubject(ctx, log, name) // ensure storage is writeable and readable // TODO: this is not necessary every time; should only perform check once every so often for each storage, which may require some global state... err := cfg.checkStorage(ctx) if err != nil { return fmt.Errorf("failed storage check: %v - storage is probably misconfigured", err) } log.Info("acquiring lock", zap.String("identifier", name)) // ensure idempotency of the renew operation for this name lockKey := cfg.lockKey(certIssueLockOp, name) err = acquireLock(ctx, cfg.Storage, lockKey) if err != nil { return fmt.Errorf("unable to acquire lock '%s': %v", lockKey, err) } defer func() { log.Info("releasing lock", zap.String("identifier", name)) if err := releaseLock(ctx, cfg.Storage, lockKey); err != nil { log.Error("unable to unlock", zap.String("identifier", name), zap.String("lock_key", lockKey), zap.Error(err)) } }() log.Info("lock acquired", zap.String("identifier", name)) f := func(ctx context.Context) error { // prepare for renewal (load PEM cert, key, and meta) certRes, err := cfg.loadCertResourceAnyIssuer(ctx, name) if err != nil { return err } // check if renew is still needed - might have been renewed while waiting for lock timeLeft, leaf, needsRenew := cfg.managedCertNeedsRenewal(certRes, false) if !needsRenew { if force { log.Info("certificate does not need to be renewed, but renewal is being forced", zap.String("identifier", name), zap.Duration("remaining", timeLeft)) } else { log.Info("certificate appears to have been renewed already", zap.String("identifier", name), zap.Duration("remaining", timeLeft)) return nil } } log.Info("renewing certificate", zap.String("identifier", name), zap.Duration("remaining", timeLeft)) if err := cfg.emit(ctx, "cert_obtaining", map[string]any{ "renewal": true, "identifier": name, "forced": force, "remaining": timeLeft, "issuer": certRes.issuerKey, // previous/current issuer }); err != nil { return fmt.Errorf("renewing certificate aborted by event handler: %w", err) } // reuse or generate new private key for CSR var privateKey crypto.PrivateKey if cfg.ReusePrivateKeys { privateKey, err = PEMDecodePrivateKey(certRes.PrivateKeyPEM) } else { privateKey, err = cfg.KeySource.GenerateKey() } if err != nil { return err } // if we generated a new key, make sure to replace its PEM encoding too! if !cfg.ReusePrivateKeys { certRes.PrivateKeyPEM, err = PEMEncodePrivateKey(privateKey) if err != nil { return err } } csr, err := cfg.generateCSR(privateKey, []string{name}, false) if err != nil { return err } // try to obtain from each issuer until we succeed var issuedCert *IssuedCertificate var issuerUsed Issuer var issuerKeys []string for _, issuer := range cfg.Issuers { // TODO: ZeroSSL's API currently requires CommonName to be set, and requires it be // distinct from SANs. If this was a cert it would violate the BRs, but their certs // are compliant, so their CSR requirements just needlessly add friction, complexity, // and inefficiency for clients. CommonName has been deprecated for 25+ years. useCSR := csr if _, ok := issuer.(*ZeroSSLIssuer); ok { useCSR, err = cfg.generateCSR(privateKey, []string{name}, true) if err != nil { return err } } issuerKeys = append(issuerKeys, issuer.IssuerKey()) if prechecker, ok := issuer.(PreChecker); ok { err = prechecker.PreCheck(ctx, []string{name}, interactive) if err != nil { continue } } // if we're renewing with the same ACME CA as before, have the ACME // client tell the server we are replacing a certificate (but doing // this on the wrong CA, or when the CA doesn't recognize the certID, // can fail the order) -- TODO: change this check to whether we're using the same ACME account, not CA if !cfg.DisableARI { if acmeData, err := certRes.getACMEData(); err == nil && acmeData.CA != "" { if acmeIss, ok := issuer.(*ACMEIssuer); ok { if acmeIss.CA == acmeData.CA { ctx = context.WithValue(ctx, ctxKeyARIReplaces, leaf) } } } } issuedCert, err = issuer.Issue(ctx, useCSR) if err == nil { issuerUsed = issuer break } // err is usually wrapped, which is nice for simply printing it, but // with our structured error logs we only need the problem string errToLog := err var problem acme.Problem if errors.As(err, &problem) { errToLog = problem } log.Error("could not get certificate from issuer", zap.String("identifier", name), zap.String("issuer", issuer.IssuerKey()), zap.Error(errToLog)) } if err != nil { cfg.emit(ctx, "cert_failed", map[string]any{ "renewal": true, "identifier": name, "remaining": timeLeft, "issuers": issuerKeys, "error": err, }) // only the error from the last issuer will be returned, but we logged the others return fmt.Errorf("[%s] Renew: %w", name, err) } issuerKey := issuerUsed.IssuerKey() // success - immediately save the renewed certificate resource metaJSON, err := json.Marshal(issuedCert.Metadata) if err != nil { log.Error("unable to encode certificate metadata", zap.Error(err)) } newCertRes := CertificateResource{ SANs: namesFromCSR(csr), CertificatePEM: issuedCert.Certificate, PrivateKeyPEM: certRes.PrivateKeyPEM, IssuerData: metaJSON, issuerKey: issuerKey, } err = cfg.saveCertResource(ctx, issuerUsed, newCertRes) if err != nil { return fmt.Errorf("[%s] Renew: saving assets: %v", name, err) } log.Info("certificate renewed successfully", zap.String("identifier", name), zap.String("issuer", issuerKey)) certKey := newCertRes.NamesKey() cfg.emit(ctx, "cert_obtained", map[string]any{ "renewal": true, "remaining": timeLeft, "identifier": name, "issuer": issuerKey, "storage_path": StorageKeys.CertsSitePrefix(issuerKey, certKey), "private_key_path": StorageKeys.SitePrivateKey(issuerKey, certKey), "certificate_path": StorageKeys.SiteCert(issuerKey, certKey), "metadata_path": StorageKeys.SiteMeta(issuerKey, certKey), "csr_pem": pem.EncodeToMemory(&pem.Block{ Type: "CERTIFICATE REQUEST", Bytes: csr.Raw, }), }) return nil } if interactive { err = f(ctx) } else { err = doWithRetry(ctx, log, f) } return err } // generateCSR generates a CSR for the given SANs. If useCN is true, CommonName will get the first SAN (TODO: this is only a temporary hack for ZeroSSL API support). func (cfg *Config) generateCSR(privateKey crypto.PrivateKey, sans []string, useCN bool) (*x509.CertificateRequest, error) { csrTemplate := new(x509.CertificateRequest) for _, name := range sans { // identifiers should be converted to punycode before going into the CSR // (convert IDNs to ASCII according to RFC 5280 section 7) normalizedName, err := idna.ToASCII(name) if err != nil { return nil, fmt.Errorf("converting identifier '%s' to ASCII: %v", name, err) } // TODO: This is a temporary hack to support ZeroSSL API... if useCN && csrTemplate.Subject.CommonName == "" && len(normalizedName) <= 64 { csrTemplate.Subject.CommonName = normalizedName continue } if ip := net.ParseIP(normalizedName); ip != nil { csrTemplate.IPAddresses = append(csrTemplate.IPAddresses, ip) } else if strings.Contains(normalizedName, "@") { csrTemplate.EmailAddresses = append(csrTemplate.EmailAddresses, normalizedName) } else if u, err := url.Parse(normalizedName); err == nil && strings.Contains(normalizedName, "/") { csrTemplate.URIs = append(csrTemplate.URIs, u) } else { csrTemplate.DNSNames = append(csrTemplate.DNSNames, normalizedName) } } if cfg.MustStaple { csrTemplate.ExtraExtensions = append(csrTemplate.ExtraExtensions, mustStapleExtension) } // IP addresses aren't printed here because I'm too lazy to marshal them as strings, but // we at least print the incoming SANs so it should be obvious what became IPs cfg.Logger.Debug("created CSR", zap.Strings("identifiers", sans), zap.Strings("san_dns_names", csrTemplate.DNSNames), zap.Strings("san_emails", csrTemplate.EmailAddresses), zap.String("common_name", csrTemplate.Subject.CommonName), zap.Int("extra_extensions", len(csrTemplate.ExtraExtensions)), ) csrDER, err := x509.CreateCertificateRequest(rand.Reader, csrTemplate, privateKey) if err != nil { return nil, err } return x509.ParseCertificateRequest(csrDER) } // RevokeCert revokes the certificate for domain via ACME protocol. It requires // that cfg.Issuers is properly configured with the same issuer that issued the // certificate being revoked. See RFC 5280 ยง5.3.1 for reason codes. // // The certificate assets are deleted from storage after successful revocation // to prevent reuse. func (cfg *Config) RevokeCert(ctx context.Context, domain string, reason int, interactive bool) error { for i, issuer := range cfg.Issuers { issuerKey := issuer.IssuerKey() rev, ok := issuer.(Revoker) if !ok { return fmt.Errorf("issuer %d (%s) is not a Revoker", i, issuerKey) } certRes, err := cfg.loadCertResource(ctx, issuer, domain) if err != nil { return err } if !cfg.Storage.Exists(ctx, StorageKeys.SitePrivateKey(issuerKey, domain)) { return fmt.Errorf("private key not found for %s", certRes.SANs) } err = rev.Revoke(ctx, certRes, reason) if err != nil { return fmt.Errorf("issuer %d (%s): %v", i, issuerKey, err) } err = cfg.deleteSiteAssets(ctx, issuerKey, domain) if err != nil { return fmt.Errorf("certificate revoked, but unable to fully clean up assets from issuer %s: %v", issuerKey, err) } } return nil } // TLSConfig is an opinionated method that returns a recommended, modern // TLS configuration that can be used to configure TLS listeners. Aside // from safe, modern defaults, this method sets two critical fields on the // TLS config which are required to enable automatic certificate // management: GetCertificate and NextProtos. // // The GetCertificate field is necessary to get certificates from memory // or storage, including both manual and automated certificates. You // should only change this field if you know what you are doing. // // The NextProtos field is pre-populated with a special value to enable // solving the TLS-ALPN ACME challenge. Because this method does not // assume any particular protocols after the TLS handshake is completed, // you will likely need to customize the NextProtos field by prepending // your application's protocols to the slice. For example, to serve // HTTP, you will need to prepend "h2" and "http/1.1" values. Be sure to // leave the acmez.ACMETLS1Protocol value intact, however, or TLS-ALPN // challenges will fail (which may be acceptable if you are not using // ACME, or specifically, the TLS-ALPN challenge). // // Unlike the package TLS() function, this method does not, by itself, // enable certificate management for any domain names. func (cfg *Config) TLSConfig() *tls.Config { return &tls.Config{ // these two fields necessary for TLS-ALPN challenge GetCertificate: cfg.GetCertificate, NextProtos: []string{acmez.ACMETLS1Protocol}, // the rest recommended for modern TLS servers MinVersion: tls.VersionTLS12, CurvePreferences: []tls.CurveID{ tls.X25519, tls.CurveP256, }, CipherSuites: preferredDefaultCipherSuites(), PreferServerCipherSuites: true, } } // getChallengeInfo loads the challenge info from either the internal challenge memory // or the external storage (implying distributed solving). The second return value // indicates whether challenge info was loaded from external storage. If true, the // challenge is being solved in a distributed fashion; if false, from internal memory. // If no matching challenge information can be found, an error is returned. func (cfg *Config) getChallengeInfo(ctx context.Context, identifier string) (Challenge, bool, error) { // first, check if our process initiated this challenge; if so, just return it chalData, ok := GetACMEChallenge(identifier) if ok { return chalData, false, nil } // otherwise, perhaps another instance in the cluster initiated it; check // the configured storage to retrieve challenge data var chalInfo acme.Challenge var chalInfoBytes []byte var tokenKey string for _, issuer := range cfg.Issuers { ds := distributedSolver{ storage: cfg.Storage, storageKeyIssuerPrefix: storageKeyACMECAPrefix(issuer.IssuerKey()), } tokenKey = ds.challengeTokensKey(identifier) var err error chalInfoBytes, err = cfg.Storage.Load(ctx, tokenKey) if err == nil { break } if errors.Is(err, fs.ErrNotExist) { continue } return Challenge{}, false, fmt.Errorf("opening distributed challenge token file %s: %v", tokenKey, err) } if len(chalInfoBytes) == 0 { return Challenge{}, false, fmt.Errorf("no information found to solve challenge for identifier: %s", identifier) } err := json.Unmarshal(chalInfoBytes, &chalInfo) if err != nil { return Challenge{}, false, fmt.Errorf("decoding challenge token file %s (corrupted?): %v", tokenKey, err) } return Challenge{Challenge: chalInfo}, true, nil } func (cfg *Config) transformSubject(ctx context.Context, logger *zap.Logger, name string) string { if cfg.SubjectTransformer == nil { return name } transformedName := cfg.SubjectTransformer(ctx, name) if logger != nil && transformedName != name { logger.Debug("transformed subject name", zap.String("original", name), zap.String("transformed", transformedName)) } return transformedName } // checkStorage tests the storage by writing random bytes // to a random key, and then loading those bytes and // comparing the loaded value. If this fails, the provided // cfg.Storage mechanism should not be used. func (cfg *Config) checkStorage(ctx context.Context) error { if cfg.DisableStorageCheck { return nil } key := fmt.Sprintf("rw_test_%d", weakrand.Int()) contents := make([]byte, 1024*10) // size sufficient for one or two ACME resources _, err := weakrand.Read(contents) if err != nil { return err } err = cfg.Storage.Store(ctx, key, contents) if err != nil { return err } defer func() { deleteErr := cfg.Storage.Delete(ctx, key) if deleteErr != nil { cfg.Logger.Error("deleting test key from storage", zap.String("key", key), zap.Error(err)) } // if there was no other error, make sure // to return any error returned from Delete if err == nil { err = deleteErr } }() loaded, err := cfg.Storage.Load(ctx, key) if err != nil { return err } if !bytes.Equal(contents, loaded) { return fmt.Errorf("load yielded different value than was stored; expected %d bytes, got %d bytes of differing elements", len(contents), len(loaded)) } return nil } // storageHasCertResources returns true if the storage // associated with cfg's certificate cache has all the // resources related to the certificate for domain: the // certificate, the private key, and the metadata. func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool { issuerKey := issuer.IssuerKey() certKey := StorageKeys.SiteCert(issuerKey, domain) keyKey := StorageKeys.SitePrivateKey(issuerKey, domain) metaKey := StorageKeys.SiteMeta(issuerKey, domain) return cfg.Storage.Exists(ctx, certKey) && cfg.Storage.Exists(ctx, keyKey) && cfg.Storage.Exists(ctx, metaKey) } // deleteSiteAssets deletes the folder in storage containing the // certificate, private key, and metadata file for domain from the // issuer with the given issuer key. func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error { err := cfg.Storage.Delete(ctx, StorageKeys.SiteCert(issuerKey, domain)) if err != nil { return fmt.Errorf("deleting certificate file: %v", err) } err = cfg.Storage.Delete(ctx, StorageKeys.SitePrivateKey(issuerKey, domain)) if err != nil { return fmt.Errorf("deleting private key: %v", err) } err = cfg.Storage.Delete(ctx, StorageKeys.SiteMeta(issuerKey, domain)) if err != nil { return fmt.Errorf("deleting metadata file: %v", err) } err = cfg.Storage.Delete(ctx, StorageKeys.CertsSitePrefix(issuerKey, domain)) if err != nil { return fmt.Errorf("deleting site asset folder: %v", err) } return nil } // lockKey returns a key for a lock that is specific to the operation // named op being performed related to domainName and this config's CA. func (cfg *Config) lockKey(op, domainName string) string { return fmt.Sprintf("%s_%s", op, domainName) } // managedCertNeedsRenewal returns true if certRes is expiring soon or already expired, // or if the process of decoding the cert and checking its expiration returned an error. // If there wasn't an error, the leaf cert is also returned, so it can be reused if // necessary, since we are parsing the PEM bundle anyway. func (cfg *Config) managedCertNeedsRenewal(certRes CertificateResource, emitLogs bool) (time.Duration, *x509.Certificate, bool) { certChain, err := parseCertsFromPEMBundle(certRes.CertificatePEM) if err != nil || len(certChain) == 0 { return 0, nil, true } var ari acme.RenewalInfo if !cfg.DisableARI { if ariPtr, err := certRes.getARI(); err == nil && ariPtr != nil { ari = *ariPtr } } remaining := time.Until(expiresAt(certChain[0])) return remaining, certChain[0], cfg.certNeedsRenewal(certChain[0], ari, emitLogs) } func (cfg *Config) emit(ctx context.Context, eventName string, data map[string]any) error { if cfg.OnEvent == nil { return nil } return cfg.OnEvent(ctx, eventName, data) } // CertificateSelector is a type which can select a certificate to use given multiple choices. type CertificateSelector interface { SelectCertificate(*tls.ClientHelloInfo, []Certificate) (Certificate, error) } // OCSPConfig configures how OCSP is handled. type OCSPConfig struct { // Disable automatic OCSP stapling; strongly // discouraged unless you have a good reason. // Disabling this puts clients at greater risk // and reduces their privacy. DisableStapling bool // A map of OCSP responder domains to replacement // domains for querying OCSP servers. Used for // overriding the OCSP responder URL that is // embedded in certificates. Mapping to an empty // URL will disable OCSP from that responder. ResponderOverrides map[string]string // Optionally specify a function that can return the URL // for an HTTP proxy to use for OCSP-related HTTP requests. HTTPProxy func(*http.Request) (*url.URL, error) } // certIssueLockOp is the name of the operation used // when naming a lock to make it mutually exclusive // with other certificate issuance operations for a // certain name. const certIssueLockOp = "issue_cert" // Constants for PKIX MustStaple extension. var ( tlsFeatureExtensionOID = asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 1, 24} ocspMustStapleFeature = []byte{0x30, 0x03, 0x02, 0x01, 0x05} mustStapleExtension = pkix.Extension{ Id: tlsFeatureExtensionOID, Value: ocspMustStapleFeature, } )