Initial implementation of ARI (#286)

* Initial implementation of ARI

* Enhance redundancy, robustness, and logging

* Improve ARI updating; integrate on-demand TLS; detect changed window
This commit is contained in:
Matt Holt 2024-05-07 09:46:03 -06:00 committed by GitHub
parent fa7161a1a8
commit 0e88b3eaa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 528 additions and 100 deletions

View File

@ -137,44 +137,21 @@ func (iss *ACMEIssuer) newACMEClientWithAccount(ctx context.Context, useTestCA,
// independent of any particular ACME account. If useTestCA is true, am.TestCA // independent of any particular ACME account. If useTestCA is true, am.TestCA
// will be used if it is set; otherwise, the primary CA will be used. // will be used if it is set; otherwise, the primary CA will be used.
func (iss *ACMEIssuer) newACMEClient(useTestCA bool) (*acmez.Client, error) { func (iss *ACMEIssuer) newACMEClient(useTestCA bool) (*acmez.Client, error) {
// ensure defaults are filled in client, err := iss.newBasicACMEClient()
var caURL string if err != nil {
if useTestCA { return nil, err
caURL = iss.TestCA
} }
if caURL == "" {
caURL = iss.CA // fill in a little more beyond a basic client
} if useTestCA && iss.TestCA != "" {
if caURL == "" { client.Client.Directory = iss.TestCA
caURL = DefaultACME.CA
} }
certObtainTimeout := iss.CertObtainTimeout certObtainTimeout := iss.CertObtainTimeout
if certObtainTimeout == 0 { if certObtainTimeout == 0 {
certObtainTimeout = DefaultACME.CertObtainTimeout certObtainTimeout = DefaultACME.CertObtainTimeout
} }
client.Client.PollTimeout = certObtainTimeout
// ensure endpoint is secure (assume HTTPS if scheme is missing) client.ChallengeSolvers = make(map[string]acmez.Solver)
if !strings.Contains(caURL, "://") {
caURL = "https://" + caURL
}
u, err := url.Parse(caURL)
if err != nil {
return nil, err
}
if u.Scheme != "https" && !SubjectIsInternal(u.Host) {
return nil, fmt.Errorf("%s: insecure CA URL (HTTPS required for non-internal CA)", caURL)
}
client := &acmez.Client{
Client: &acme.Client{
Directory: caURL,
PollTimeout: certObtainTimeout,
UserAgent: buildUAString(),
HTTPClient: iss.httpClient,
},
ChallengeSolvers: make(map[string]acmez.Solver),
}
client.Logger = iss.Logger.Named("acme_client")
// configure challenges (most of the time, DNS challenge is // configure challenges (most of the time, DNS challenge is
// exclusive of other ones because it is usually only used // exclusive of other ones because it is usually only used
@ -230,6 +207,42 @@ func (iss *ACMEIssuer) newACMEClient(useTestCA bool) (*acmez.Client, error) {
return client, nil return client, nil
} }
// newBasicACMEClient sets up a basically-functional ACME client that is not capable
// of solving challenges but can provide basic interactions with the server.
func (iss *ACMEIssuer) newBasicACMEClient() (*acmez.Client, error) {
caURL := iss.CA
if caURL == "" {
caURL = DefaultACME.CA
}
// ensure endpoint is secure (assume HTTPS if scheme is missing)
if !strings.Contains(caURL, "://") {
caURL = "https://" + caURL
}
u, err := url.Parse(caURL)
if err != nil {
return nil, err
}
if u.Scheme != "https" && !SubjectIsInternal(u.Host) {
return nil, fmt.Errorf("%s: insecure CA URL (HTTPS required for non-internal CA)", caURL)
}
return &acmez.Client{
Client: &acme.Client{
Directory: caURL,
UserAgent: buildUAString(),
HTTPClient: iss.httpClient,
Logger: iss.Logger.Named("acme_client"),
},
}, nil
}
func (iss *ACMEIssuer) getRenewalInfo(ctx context.Context, cert Certificate) (acme.RenewalInfo, error) {
acmeClient, err := iss.newBasicACMEClient()
if err != nil {
return acme.RenewalInfo{}, err
}
return acmeClient.GetRenewalInfo(ctx, cert.Certificate.Leaf)
}
func (iss *ACMEIssuer) getHTTPPort() int { func (iss *ACMEIssuer) getHTTPPort() int {
useHTTPPort := HTTPChallengePort useHTTPPort := HTTPChallengePort
if HTTPPort > 0 && HTTPPort != HTTPChallengePort { if HTTPPort > 0 && HTTPPort != HTTPChallengePort {

View File

@ -362,12 +362,13 @@ func (am *ACMEIssuer) Issue(ctx context.Context, csr *x509.CertificateRequest) (
panic("missing config pointer (must use NewACMEIssuer)") panic("missing config pointer (must use NewACMEIssuer)")
} }
var isRetry bool var attempts int
if attempts, ok := ctx.Value(AttemptsCtxKey).(*int); ok { if attemptsPtr, ok := ctx.Value(AttemptsCtxKey).(*int); ok {
isRetry = *attempts > 0 attempts = *attemptsPtr
} }
isRetry := attempts > 0
cert, usedTestCA, err := am.doIssue(ctx, csr, isRetry) cert, usedTestCA, err := am.doIssue(ctx, csr, attempts)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -395,7 +396,7 @@ func (am *ACMEIssuer) Issue(ctx context.Context, csr *x509.CertificateRequest) (
// other endpoint. This is more likely to happen if a user is testing with // other endpoint. This is more likely to happen if a user is testing with
// the staging CA as the main CA, then changes their configuration once they // the staging CA as the main CA, then changes their configuration once they
// think they are ready for the production endpoint. // think they are ready for the production endpoint.
cert, _, err = am.doIssue(ctx, csr, false) cert, _, err = am.doIssue(ctx, csr, 0)
if err != nil { if err != nil {
// succeeded with test CA but failed just now with the production CA; // succeeded with test CA but failed just now with the production CA;
// either we are observing differing internal states of each CA that will // either we are observing differing internal states of each CA that will
@ -423,7 +424,8 @@ func (am *ACMEIssuer) Issue(ctx context.Context, csr *x509.CertificateRequest) (
return cert, err return cert, err
} }
func (am *ACMEIssuer) doIssue(ctx context.Context, csr *x509.CertificateRequest, useTestCA bool) (*IssuedCertificate, bool, error) { func (am *ACMEIssuer) doIssue(ctx context.Context, csr *x509.CertificateRequest, attempts int) (*IssuedCertificate, bool, error) {
useTestCA := attempts > 0
client, err := am.newACMEClientWithAccount(ctx, useTestCA, false) client, err := am.newACMEClientWithAccount(ctx, useTestCA, false)
if err != nil { if err != nil {
return nil, false, err return nil, false, err
@ -449,6 +451,22 @@ func (am *ACMEIssuer) doIssue(ctx context.Context, csr *x509.CertificateRequest,
params.NotAfter = time.Now().Add(am.NotAfter) params.NotAfter = time.Now().Add(am.NotAfter)
} }
// Notify the ACME server we are replacing a certificate (if the caller says we are),
// only if the following conditions are met:
// - The caller has set a Replaces value in the context, indicating this is a renewal.
// - Not using test CA. This should be obvious, but a test CA should be in a separate
// environment from production, and thus not have knowledge of the cert being replaced.
// - Not a certain attempt number. We skip setting Replaces once early on in the retries
// in case the reason the order is failing is only because there is a state inconsistency
// between client and server or some sort of bookkeeping error with regards to the certID
// and the server is rejecting the ARI certID. In any case, an invalid certID may cause
// orders to fail. So try once without setting it.
if !usingTestCA && attempts != 2 {
if replacing, ok := ctx.Value(ctxKeyARIReplaces).(*x509.Certificate); ok {
params.Replaces = replacing
}
}
// do this in a loop because there's an error case that may necessitate a retry, but not more than once // do this in a loop because there's an error case that may necessitate a retry, but not more than once
var certChains []acme.Certificate var certChains []acme.Certificate
for i := 0; i < 2; i++ { for i := 0; i < 2; i++ {
@ -631,6 +649,10 @@ const (
// prefixACME is the storage key prefix used for ACME-specific assets. // prefixACME is the storage key prefix used for ACME-specific assets.
const prefixACME = "acme" const prefixACME = "acme"
type ctxKey string
const ctxKeyARIReplaces = ctxKey("ari_replaces")
// Interface guards // Interface guards
var ( var (
_ PreChecker = (*ACMEIssuer)(nil) _ PreChecker = (*ACMEIssuer)(nil)

View File

@ -18,12 +18,15 @@ import (
"context" "context"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"encoding/json"
"fmt" "fmt"
"math/rand"
"net" "net"
"os" "os"
"strings" "strings"
"time" "time"
"github.com/mholt/acmez/v2/acme"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/crypto/ocsp" "golang.org/x/crypto/ocsp"
) )
@ -56,6 +59,9 @@ type Certificate struct {
// The unique string identifying the issuer of this certificate. // The unique string identifying the issuer of this certificate.
issuerKey string issuerKey string
// ACME Renewal Information, if available
ari acme.RenewalInfo
} }
// Empty returns true if the certificate struct is not filled out; at // Empty returns true if the certificate struct is not filled out; at
@ -67,10 +73,106 @@ func (cert Certificate) Empty() bool {
// Hash returns a checksum of the certificate chain's DER-encoded bytes. // Hash returns a checksum of the certificate chain's DER-encoded bytes.
func (cert Certificate) Hash() string { return cert.hash } func (cert Certificate) Hash() string { return cert.hash }
// NeedsRenewal returns true if the certificate is // NeedsRenewal returns true if the certificate is expiring
// expiring soon (according to cfg) or has expired. // soon (according to ARI and/or cfg) or has expired.
func (cert Certificate) NeedsRenewal(cfg *Config) bool { func (cert Certificate) NeedsRenewal(cfg *Config) bool {
return currentlyInRenewalWindow(cert.Leaf.NotBefore, expiresAt(cert.Leaf), cfg.RenewalWindowRatio) return cfg.certNeedsRenewal(cert.Leaf, cert.ari, true)
}
// certNeedsRenewal consults ACME Renewal Info (ARI) and certificate expiration to determine
// whether the leaf certificate needs to be renewed yet. If true is returned, the certificate
// should be renewed as soon as possible. The reasoning for a true return value is logged
// unless emitLogs is false; this can be useful to suppress noisy logs in the case where you
// first call this to determine if a cert in memory needs renewal, and then right after you
// call it again to see if the cert in storage still needs renewal -- you probably don't want
// to log the second time for checking the cert in storage which is mainly for synchronization.
func (cfg *Config) certNeedsRenewal(leaf *x509.Certificate, ari acme.RenewalInfo, emitLogs bool) bool {
expiration := expiresAt(leaf)
var logger *zap.Logger
if emitLogs {
logger = cfg.Logger.With(
zap.Strings("subjects", leaf.DNSNames),
zap.Time("expiration", expiration),
zap.String("ari_cert_id", ari.UniqueIdentifier),
zap.Timep("next_ari_update", ari.RetryAfter),
zap.Duration("renew_check_interval", cfg.certCache.options.RenewCheckInterval),
zap.Time("window_start", ari.SuggestedWindow.Start),
zap.Time("window_end", ari.SuggestedWindow.End))
} else {
logger = zap.NewNop()
}
// first check ARI: if it says it's time to renew, it's time to renew
// (notice that we don't strictly require an ARI window to also exist; we presume
// that if a time has been selected, a window does or did exist, even if it didn't
// get stored/encoded for some reason - but also: this allows administrators to
// manually or explicitly schedule a renewal time indepedently of ARI which could
// be useful)
selectedTime := ari.SelectedTime
// if, for some reason a random time in the window hasn't been selected yet, but an ARI
// window does exist, we can always improvise one... even if this is called repeatedly,
// a random time is a random time, whether you generate it once or more :D
// (code borrowed from our acme package)
if selectedTime.IsZero() &&
(!ari.SuggestedWindow.Start.IsZero() && !ari.SuggestedWindow.End.IsZero()) {
start, end := ari.SuggestedWindow.Start.Unix()+1, ari.SuggestedWindow.End.Unix()
selectedTime = time.Unix(rand.Int63n(end-start)+start, 0).UTC()
logger.Warn("no renewal time had been selected with ARI; chose an ephemeral one for now",
zap.Time("ephemeral_selected_time", selectedTime))
}
// if a renewal time has been selected, start with that
if !selectedTime.IsZero() {
// ARI spec recommends an algorithm that renews after the randomly-selected
// time OR just before it if the next waking time would be after it; this
// cutoff can actually be before the start of the renewal window, but the spec
// author says that's OK: https://github.com/aarongable/draft-acme-ari/issues/71
cutoff := ari.SelectedTime.Add(-cfg.certCache.options.RenewCheckInterval)
if time.Now().After(cutoff) {
logger.Info("certificate needs renewal based on ARI window",
zap.Time("selected_time", selectedTime),
zap.Time("renewal_cutoff", cutoff))
return true
}
// according to ARI, we are not ready to renew; however, we do not rely solely on
// ARI calculations... what if there is a bug in our implementation, or in the
// server's, or the stored metadata? for redundancy, give credence to the expiration
// date; ignore ARI if we are past a "dangerously close" limit, to avoid any
// possibility of a bug in ARI compromising a site's uptime: we should always always
// always give heed to actual validity period
if currentlyInRenewalWindow(leaf.NotBefore, expiration, 1.0/20.0) {
logger.Warn("certificate is in emergency renewal window; superceding ARI",
zap.Duration("remaining", time.Until(expiration)),
zap.Time("renewal_cutoff", cutoff))
return true
}
}
// the normal check, in the absence of ARI, is to determine if we're near enough (or past)
// the expiration date based on the configured remaining:lifetime ratio
if currentlyInRenewalWindow(leaf.NotBefore, expiration, cfg.RenewalWindowRatio) {
logger.Info("certificate is in configured renewal window based on expiration date",
zap.Duration("remaining", time.Until(expiration)))
return true
}
// finally, if the certificate is expiring imminently, always attempt a renewal;
// we check both a (very low) lifetime ratio and also a strict difference between
// the time until expiration and the interval at which we run the standard maintenance
// routine to check for renewals, to accommodate both exceptionally long and short
// cert lifetimes
if currentlyInRenewalWindow(leaf.NotBefore, expiration, 1.0/50.0) ||
time.Until(expiration) < cfg.certCache.options.RenewCheckInterval*5 {
logger.Warn("certificate is in emergency renewal window; expiration imminent",
zap.Duration("remaining", time.Until(expiration)))
return true
}
return false
} }
// Expired returns true if the certificate has expired. // Expired returns true if the certificate has expired.
@ -85,10 +187,12 @@ func (cert Certificate) Expired() bool {
return time.Now().After(expiresAt(cert.Leaf)) return time.Now().After(expiresAt(cert.Leaf))
} }
// currentlyInRenewalWindow returns true if the current time is // currentlyInRenewalWindow returns true if the current time is within
// within the renewal window, according to the given start/end // (or after) the renewal window, according to the given start/end
// dates and the ratio of the renewal window. If true is returned, // dates and the ratio of the renewal window. If true is returned,
// the certificate being considered is due for renewal. // the certificate being considered is due for renewal. The ratio
// is remaining:total time, i.e. 1/3 = 1/3 of lifetime remaining,
// or 9/10 = 9/10 of time lifetime remaining.
func currentlyInRenewalWindow(notBefore, notAfter time.Time, renewalWindowRatio float64) bool { func currentlyInRenewalWindow(notBefore, notAfter time.Time, renewalWindowRatio float64) bool {
if notAfter.IsZero() { if notAfter.IsZero() {
return false return false
@ -154,9 +258,37 @@ func (cfg *Config) loadManagedCertificate(ctx context.Context, domain string) (C
} }
cert.managed = true cert.managed = true
cert.issuerKey = certRes.issuerKey cert.issuerKey = certRes.issuerKey
if ari, err := certRes.getARI(); err == nil && ari != nil {
cert.ari = *ari
}
return cert, nil return cert, nil
} }
// getARI unpacks ACME Renewal Information from the issuer data, if available.
// It is only an error if there is invalid JSON.
func (certRes CertificateResource) getARI() (*acme.RenewalInfo, error) {
acmeData, err := certRes.getACMEData()
if err != nil {
return nil, err
}
return acmeData.RenewalInfo, nil
}
// getACMEData returns the ACME certificate metadata from the IssuerData, but
// note that a non-ACME-issued certificate may return an empty value and nil
// since the JSON may still decode successfully but just not match any or all
// of the fields. Remember that the IssuerKey is used to store and access the
// cert files in the first place (it is part of the path) so in theory if you
// load a CertificateResource from an ACME issuer it should work as expected.
func (certRes CertificateResource) getACMEData() (acme.Certificate, error) {
if len(certRes.IssuerData) == 0 {
return acme.Certificate{}, nil
}
var acmeCert acme.Certificate
err := json.Unmarshal(certRes.IssuerData, &acmeCert)
return acmeCert, err
}
// CacheUnmanagedCertificatePEMFile loads a certificate for host using certFile // CacheUnmanagedCertificatePEMFile loads a certificate for host using certFile
// and keyFile, which must be in PEM format. It stores the certificate in // and keyFile, which must be in PEM format. It stores the certificate in
// the in-memory cache and returns the hash, useful for removing from the cache. // the in-memory cache and returns the hash, useful for removing from the cache.
@ -329,21 +461,22 @@ func fillCertFromLeaf(cert *Certificate, tlsCert tls.Certificate) error {
return nil return nil
} }
// managedCertInStorageExpiresSoon returns true if cert (being a // managedCertInStorageNeedsRenewal returns true if cert (being a
// managed certificate) is expiring within RenewDurationBefore. // managed certificate) is expiring soon (according to cfg) or if
// It returns false if there was an error checking the expiration // ACME Renewal Information (ARI) is available and says that it is
// of the certificate as found in storage, or if the certificate // time to renew (it uses existing ARI; it does not update it).
// in storage is NOT expiring soon. A certificate that is expiring // It returns false if there was an error, the cert is not expiring
// soon, and ARI window is still future. A certificate that is expiring
// soon in our cache but is not expiring soon in storage probably // soon in our cache but is not expiring soon in storage probably
// means that another instance renewed the certificate in the // means that another instance renewed the certificate in the
// meantime, and it would be a good idea to simply load the cert // meantime, and it would be a good idea to simply load the cert
// into our cache rather than repeating the renewal process again. // into our cache rather than repeating the renewal process again.
func (cfg *Config) managedCertInStorageExpiresSoon(ctx context.Context, cert Certificate) (bool, error) { func (cfg *Config) managedCertInStorageNeedsRenewal(ctx context.Context, cert Certificate) (bool, error) {
certRes, err := cfg.loadCertResourceAnyIssuer(ctx, cert.Names[0]) certRes, err := cfg.loadCertResourceAnyIssuer(ctx, cert.Names[0])
if err != nil { if err != nil {
return false, err return false, err
} }
_, needsRenew := cfg.managedCertNeedsRenewal(certRes) _, _, needsRenew := cfg.managedCertNeedsRenewal(certRes, false)
return needsRenew, nil return needsRenew, nil
} }

View File

@ -39,6 +39,7 @@ import (
"crypto" "crypto"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"encoding/json"
"fmt" "fmt"
"log" "log"
"net" "net"
@ -388,7 +389,8 @@ type IssuedCertificate struct {
Certificate []byte Certificate []byte
// Any extra information to serialize alongside the // Any extra information to serialize alongside the
// certificate in storage. // certificate in storage. It MUST be serializable
// as JSON in order to be preserved.
Metadata any Metadata any
} }
@ -409,7 +411,7 @@ type CertificateResource struct {
// Any extra information associated with the certificate, // Any extra information associated with the certificate,
// usually provided by the issuer implementation. // usually provided by the issuer implementation.
IssuerData any `json:"issuer_data,omitempty"` IssuerData json.RawMessage `json:"issuer_data,omitempty"`
// The unique string identifying the issuer of the // The unique string identifying the issuer of the
// certificate; internally useful for storage access. // certificate; internally useful for storage access.

View File

@ -52,6 +52,7 @@ type Config struct {
// it should be renewed; for most certificates, the // it should be renewed; for most certificates, the
// global default is good, but for extremely short- // global default is good, but for extremely short-
// lived certs, you may want to raise this to ~0.5. // lived certs, you may want to raise this to ~0.5.
// Ratio is remaining:total lifetime.
RenewalWindowRatio float64 RenewalWindowRatio float64
// An optional event callback clients can set // An optional event callback clients can set
@ -446,6 +447,15 @@ func (cfg *Config) manageOne(ctx context.Context, domainName string, async bool)
return err return err
} }
// ensure ARI is updated before we check whether the cert needs renewing
// (we ignore the second return value because we already check if needs renewing anyway)
if cert.ari.NeedsRefresh() {
cert, _, err = cfg.updateARI(ctx, cert, cfg.Logger)
if err != nil {
cfg.Logger.Error("updating ARI upon managing", zap.Error(err))
}
}
// otherwise, simply renew the certificate if needed // otherwise, simply renew the certificate if needed
if cert.NeedsRenewal(cfg) { if cert.NeedsRenewal(cfg) {
var err error var err error
@ -639,11 +649,15 @@ func (cfg *Config) obtainCert(ctx context.Context, name string, interactive bool
issuerKey := issuerUsed.IssuerKey() issuerKey := issuerUsed.IssuerKey()
// success - immediately save the certificate resource // success - immediately save the certificate resource
metaJSON, err := json.Marshal(issuedCert.Metadata)
if err != nil {
log.Error("unable to encode certificate metadata", zap.Error(err))
}
certRes := CertificateResource{ certRes := CertificateResource{
SANs: namesFromCSR(csr), SANs: namesFromCSR(csr),
CertificatePEM: issuedCert.Certificate, CertificatePEM: issuedCert.Certificate,
PrivateKeyPEM: privKeyPEM, PrivateKeyPEM: privKeyPEM,
IssuerData: issuedCert.Metadata, IssuerData: metaJSON,
issuerKey: issuerUsed.IssuerKey(), issuerKey: issuerUsed.IssuerKey(),
} }
err = cfg.saveCertResource(ctx, issuerUsed, certRes) err = cfg.saveCertResource(ctx, issuerUsed, certRes)
@ -792,7 +806,7 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv
} }
// check if renew is still needed - might have been renewed while waiting for lock // check if renew is still needed - might have been renewed while waiting for lock
timeLeft, needsRenew := cfg.managedCertNeedsRenewal(certRes) timeLeft, leaf, needsRenew := cfg.managedCertNeedsRenewal(certRes, false)
if !needsRenew { if !needsRenew {
if force { if force {
log.Info("certificate does not need to be renewed, but renewal is being forced", log.Info("certificate does not need to be renewed, but renewal is being forced",
@ -869,6 +883,18 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv
} }
} }
// if we're renewing with the same ACME CA as before, have the ACME
// client tell the server we are replacing a certificate (but doing
// this on the wrong CA, or when the CA doesn't recognize the certID,
// can fail the order)
if acmeData, err := certRes.getACMEData(); err == nil && acmeData.CA != "" {
if acmeIss, ok := issuer.(*ACMEIssuer); ok {
if acmeIss.CA == acmeData.CA {
ctx = context.WithValue(ctx, ctxKeyARIReplaces, leaf)
}
}
}
issuedCert, err = issuer.Issue(ctx, useCSR) issuedCert, err = issuer.Issue(ctx, useCSR)
if err == nil { if err == nil {
issuerUsed = issuer issuerUsed = issuer
@ -902,11 +928,15 @@ func (cfg *Config) renewCert(ctx context.Context, name string, force, interactiv
issuerKey := issuerUsed.IssuerKey() issuerKey := issuerUsed.IssuerKey()
// success - immediately save the renewed certificate resource // success - immediately save the renewed certificate resource
metaJSON, err := json.Marshal(issuedCert.Metadata)
if err != nil {
log.Error("unable to encode certificate metadata", zap.Error(err))
}
newCertRes := CertificateResource{ newCertRes := CertificateResource{
SANs: namesFromCSR(csr), SANs: namesFromCSR(csr),
CertificatePEM: issuedCert.Certificate, CertificatePEM: issuedCert.Certificate,
PrivateKeyPEM: certRes.PrivateKeyPEM, PrivateKeyPEM: certRes.PrivateKeyPEM,
IssuerData: issuedCert.Metadata, IssuerData: metaJSON,
issuerKey: issuerKey, issuerKey: issuerKey,
} }
err = cfg.saveCertResource(ctx, issuerUsed, newCertRes) err = cfg.saveCertResource(ctx, issuerUsed, newCertRes)
@ -1206,14 +1236,19 @@ func (cfg *Config) lockKey(op, domainName string) string {
// managedCertNeedsRenewal returns true if certRes is expiring soon or already expired, // managedCertNeedsRenewal returns true if certRes is expiring soon or already expired,
// or if the process of decoding the cert and checking its expiration returned an error. // or if the process of decoding the cert and checking its expiration returned an error.
func (cfg *Config) managedCertNeedsRenewal(certRes CertificateResource) (time.Duration, bool) { // If there wasn't an error, the leaf cert is also returned, so it can be reused if
// necessary, since we are parsing the PEM bundle anyway.
func (cfg *Config) managedCertNeedsRenewal(certRes CertificateResource, emitLogs bool) (time.Duration, *x509.Certificate, bool) {
certChain, err := parseCertsFromPEMBundle(certRes.CertificatePEM) certChain, err := parseCertsFromPEMBundle(certRes.CertificatePEM)
if err != nil { if err != nil || len(certChain) == 0 {
return 0, true return 0, nil, true
}
var ari acme.RenewalInfo
if ariPtr, err := certRes.getARI(); err == nil && ariPtr != nil {
ari = *ariPtr
} }
remaining := time.Until(expiresAt(certChain[0])) remaining := time.Until(expiresAt(certChain[0]))
needsRenew := currentlyInRenewalWindow(certChain[0].NotBefore, expiresAt(certChain[0]), cfg.RenewalWindowRatio) return remaining, certChain[0], cfg.certNeedsRenewal(certChain[0], ari, emitLogs)
return remaining, needsRenew
} }
func (cfg *Config) emit(ctx context.Context, eventName string, data map[string]any) error { func (cfg *Config) emit(ctx context.Context, eventName string, data map[string]any) error {

View File

@ -15,7 +15,9 @@
package certmagic package certmagic
import ( import (
"bytes"
"context" "context"
"encoding/json"
"os" "os"
"reflect" "reflect"
"testing" "testing"
@ -51,9 +53,9 @@ func TestSaveCertResource(t *testing.T) {
SANs: []string{domain}, SANs: []string{domain},
PrivateKeyPEM: []byte(keyContents), PrivateKeyPEM: []byte(keyContents),
CertificatePEM: []byte(certContents), CertificatePEM: []byte(certContents),
IssuerData: &acme.Certificate{ IssuerData: mustJSON(acme.Certificate{
URL: "https://example.com/cert", URL: "https://example.com/cert",
}, }),
issuerKey: am.IssuerKey(), issuerKey: am.IssuerKey(),
} }
@ -62,17 +64,22 @@ func TestSaveCertResource(t *testing.T) {
t.Fatalf("Expected no error, got: %v", err) t.Fatalf("Expected no error, got: %v", err)
} }
// the result of our test will be a map, since we have
// no choice but to decode it into an 'any' interface
cert.IssuerData = map[string]any{
"url": "https://example.com/cert",
}
siteData, err := testConfig.loadCertResource(ctx, am, domain) siteData, err := testConfig.loadCertResource(ctx, am, domain)
if err != nil { if err != nil {
t.Fatalf("Expected no error reading site, got: %v", err) t.Fatalf("Expected no error reading site, got: %v", err)
} }
siteData.IssuerData = bytes.ReplaceAll(siteData.IssuerData, []byte("\t"), []byte(""))
siteData.IssuerData = bytes.ReplaceAll(siteData.IssuerData, []byte("\n"), []byte(""))
siteData.IssuerData = bytes.ReplaceAll(siteData.IssuerData, []byte(" "), []byte(""))
if !reflect.DeepEqual(cert, siteData) { if !reflect.DeepEqual(cert, siteData) {
t.Errorf("Expected '%+v' to match '%+v'", cert, siteData) t.Errorf("Expected '%+v' to match '%+v'\n%s\n%s", cert.IssuerData, siteData.IssuerData, string(cert.IssuerData), string(siteData.IssuerData))
} }
} }
func mustJSON(val any) []byte {
result, err := json.Marshal(val)
if err != nil {
panic("marshaling JSON: " + err.Error())
}
return result
}

2
go.mod
View File

@ -8,7 +8,7 @@ require (
github.com/caddyserver/zerossl v0.1.2 github.com/caddyserver/zerossl v0.1.2
github.com/klauspost/cpuid/v2 v2.2.7 github.com/klauspost/cpuid/v2 v2.2.7
github.com/libdns/libdns v0.2.2 github.com/libdns/libdns v0.2.2
github.com/mholt/acmez/v2 v2.0.0 github.com/mholt/acmez/v2 v2.0.1-0.20240506200913-5a16e768dea9
github.com/miekg/dns v1.1.58 github.com/miekg/dns v1.1.58
github.com/zeebo/blake3 v0.2.3 github.com/zeebo/blake3 v0.2.3
go.uber.org/zap v1.27.0 go.uber.org/zap v1.27.0

4
go.sum
View File

@ -7,8 +7,8 @@ github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuV
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/libdns/libdns v0.2.2 h1:O6ws7bAfRPaBsgAYt8MDe2HcNBGC29hkZ9MX2eUSX3s= github.com/libdns/libdns v0.2.2 h1:O6ws7bAfRPaBsgAYt8MDe2HcNBGC29hkZ9MX2eUSX3s=
github.com/libdns/libdns v0.2.2/go.mod h1:4Bj9+5CQiNMVGf87wjX4CY3HQJypUHRuLvlsfsZqLWQ= github.com/libdns/libdns v0.2.2/go.mod h1:4Bj9+5CQiNMVGf87wjX4CY3HQJypUHRuLvlsfsZqLWQ=
github.com/mholt/acmez/v2 v2.0.0 h1:FsGoEdA5mPhAaGfrwIEM13jnn2hn3jU6Vny+RWypt3o= github.com/mholt/acmez/v2 v2.0.1-0.20240506200913-5a16e768dea9 h1:b8yqT+RDihRjjBRq2FWtnduFHqPlpyrKM4ZH0UatgKM=
github.com/mholt/acmez/v2 v2.0.0/go.mod h1:fX4c9r5jYwMyMsC+7tkYRxHibkOTgta5DIFGoe67e1U= github.com/mholt/acmez/v2 v2.0.1-0.20240506200913-5a16e768dea9/go.mod h1:fX4c9r5jYwMyMsC+7tkYRxHibkOTgta5DIFGoe67e1U=
github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4= github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4=
github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY= github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

View File

@ -546,11 +546,11 @@ func (cfg *Config) obtainOnDemandCertificate(ctx context.Context, hello *tls.Cli
// //
// This function is safe for use by multiple concurrent goroutines. // This function is safe for use by multiple concurrent goroutines.
func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) { func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) {
log := cfg.Logger.Named("on_demand") logger := cfg.Logger.Named("on_demand")
// Check OCSP staple validity // Check OCSP staple validity
if cert.ocsp != nil && !freshOCSP(cert.ocsp) { if cert.ocsp != nil && !freshOCSP(cert.ocsp) {
log.Debug("OCSP response needs refreshing", logger.Debug("OCSP response needs refreshing",
zap.Strings("identifiers", cert.Names), zap.Strings("identifiers", cert.Names),
zap.Int("ocsp_status", cert.ocsp.Status), zap.Int("ocsp_status", cert.ocsp.Status),
zap.Time("this_update", cert.ocsp.ThisUpdate), zap.Time("this_update", cert.ocsp.ThisUpdate),
@ -560,12 +560,12 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe
if err != nil { if err != nil {
// An error with OCSP stapling is not the end of the world, and in fact, is // An error with OCSP stapling is not the end of the world, and in fact, is
// quite common considering not all certs have issuer URLs that support it. // quite common considering not all certs have issuer URLs that support it.
log.Warn("stapling OCSP", logger.Warn("stapling OCSP",
zap.String("server_name", hello.ServerName), zap.String("server_name", hello.ServerName),
zap.Strings("sans", cert.Names), zap.Strings("sans", cert.Names),
zap.Error(err)) zap.Error(err))
} else { } else {
log.Debug("successfully stapled new OCSP response", logger.Debug("successfully stapled new OCSP response",
zap.Strings("identifiers", cert.Names), zap.Strings("identifiers", cert.Names),
zap.Int("ocsp_status", cert.ocsp.Status), zap.Int("ocsp_status", cert.ocsp.Status),
zap.Time("this_update", cert.ocsp.ThisUpdate), zap.Time("this_update", cert.ocsp.ThisUpdate),
@ -578,10 +578,20 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe
cfg.certCache.mu.Unlock() cfg.certCache.mu.Unlock()
} }
// Check ARI status
if cert.ari.NeedsRefresh() {
// we ignore the second return value here because we go on to check renewal status below regardless
var err error
cert, _, err = cfg.updateARI(ctx, cert, logger)
if err != nil {
logger.Error("updated ARI", zap.Error(err))
}
}
// We attempt to replace any certificates that were revoked. // We attempt to replace any certificates that were revoked.
// Crucially, this happens OUTSIDE a lock on the certCache. // Crucially, this happens OUTSIDE a lock on the certCache.
if certShouldBeForceRenewed(cert) { if certShouldBeForceRenewed(cert) {
log.Warn("on-demand certificate's OCSP status is REVOKED; will try to forcefully renew", logger.Warn("on-demand certificate's OCSP status is REVOKED; will try to forcefully renew",
zap.Strings("identifiers", cert.Names), zap.Strings("identifiers", cert.Names),
zap.Int("ocsp_status", cert.ocsp.Status), zap.Int("ocsp_status", cert.ocsp.Status),
zap.Time("revoked_at", cert.ocsp.RevokedAt), zap.Time("revoked_at", cert.ocsp.RevokedAt),
@ -591,14 +601,13 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe
} }
// Check cert expiration // Check cert expiration
if currentlyInRenewalWindow(cert.Leaf.NotBefore, expiresAt(cert.Leaf), cfg.RenewalWindowRatio) { if cfg.certNeedsRenewal(cert.Leaf, cert.ari, true) {
// Check if the certificate still exists on disk. If not, we need to obtain a new one. // Check if the certificate still exists on disk. If not, we need to obtain a new one.
// This can happen if the certificate was cleaned up by the storage cleaner, but still // This can happen if the certificate was cleaned up by the storage cleaner, but still
// remains in the in-memory cache. // remains in the in-memory cache.
if !cfg.storageHasCertResourcesAnyIssuer(ctx, cert.Names[0]) { if !cfg.storageHasCertResourcesAnyIssuer(ctx, cert.Names[0]) {
log.Debug("certificate not found on disk; obtaining new certificate", logger.Debug("certificate not found on disk; obtaining new certificate",
zap.Strings("identifiers", cert.Names)) zap.Strings("identifiers", cert.Names))
return cfg.obtainOnDemandCertificate(ctx, hello) return cfg.obtainOnDemandCertificate(ctx, hello)
} }
// Otherwise, renew the certificate. // Otherwise, renew the certificate.
@ -620,7 +629,7 @@ func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHe
// //
// This function is safe for use by multiple concurrent goroutines. // This function is safe for use by multiple concurrent goroutines.
func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) { func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) {
log := logWithRemote(cfg.Logger.Named("on_demand"), hello) logger := logWithRemote(cfg.Logger.Named("on_demand"), hello)
name := cfg.getNameFromClientHello(hello) name := cfg.getNameFromClientHello(hello)
timeLeft := time.Until(expiresAt(currentCert.Leaf)) timeLeft := time.Until(expiresAt(currentCert.Leaf))
@ -637,7 +646,7 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
// renewing it, so we might as well serve what we have without blocking, UNLESS // renewing it, so we might as well serve what we have without blocking, UNLESS
// we're forcing renewal, in which case the current certificate is not usable // we're forcing renewal, in which case the current certificate is not usable
if timeLeft > 0 && !revoked { if timeLeft > 0 && !revoked {
log.Debug("certificate expires soon but is already being renewed; serving current certificate", logger.Debug("certificate expires soon but is already being renewed; serving current certificate",
zap.Strings("subjects", currentCert.Names), zap.Strings("subjects", currentCert.Names),
zap.Duration("remaining", timeLeft)) zap.Duration("remaining", timeLeft))
return currentCert, nil return currentCert, nil
@ -646,7 +655,7 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
// otherwise, we'll have to wait for the renewal to finish so we don't serve // otherwise, we'll have to wait for the renewal to finish so we don't serve
// a revoked or expired certificate // a revoked or expired certificate
log.Debug("certificate has expired, but is already being renewed; waiting for renewal to complete", logger.Debug("certificate has expired, but is already being renewed; waiting for renewal to complete",
zap.Strings("subjects", currentCert.Names), zap.Strings("subjects", currentCert.Names),
zap.Time("expired", expiresAt(currentCert.Leaf)), zap.Time("expired", expiresAt(currentCert.Leaf)),
zap.Bool("revoked", revoked)) zap.Bool("revoked", revoked))
@ -677,7 +686,7 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
obtainCertWaitChansMu.Unlock() obtainCertWaitChansMu.Unlock()
} }
log = log.With( logger = logger.With(
zap.String("server_name", name), zap.String("server_name", name),
zap.Strings("subjects", currentCert.Names), zap.Strings("subjects", currentCert.Names),
zap.Time("expiration", expiresAt(currentCert.Leaf)), zap.Time("expiration", expiresAt(currentCert.Leaf)),
@ -698,19 +707,19 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
cfg.certCache.mu.Unlock() cfg.certCache.mu.Unlock()
unblockWaiters() unblockWaiters()
if log != nil { if logger != nil {
log.Error("certificate should not be obtained", zap.Error(err)) logger.Error("certificate should not be obtained", zap.Error(err))
} }
return Certificate{}, err return Certificate{}, err
} }
log.Info("attempting certificate renewal") logger.Info("attempting certificate renewal")
// otherwise, renew with issuer, etc. // otherwise, renew with issuer, etc.
var newCert Certificate var newCert Certificate
if revoked { if revoked {
newCert, err = cfg.forceRenew(ctx, log, currentCert) newCert, err = cfg.forceRenew(ctx, logger, currentCert)
} else { } else {
err = cfg.RenewCertAsync(ctx, name, false) err = cfg.RenewCertAsync(ctx, name, false)
if err == nil { if err == nil {
@ -725,7 +734,7 @@ func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.Clien
unblockWaiters() unblockWaiters()
if err != nil { if err != nil {
log.Error("renewing and reloading certificate", zap.String("server_name", name), zap.Error(err)) logger.Error("renewing and reloading certificate", zap.String("server_name", name), zap.Error(err))
} }
return newCert, err return newCert, err

View File

@ -92,7 +92,7 @@ func (certCache *Cache) maintainAssets(panicCount int) {
func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error { func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
log := certCache.logger.Named("maintenance") log := certCache.logger.Named("maintenance")
// configs will hold a map of certificate name to the config // configs will hold a map of certificate hash to the config
// to use when managing that certificate // to use when managing that certificate
configs := make(map[string]*Config) configs := make(map[string]*Config)
@ -102,7 +102,7 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
// words, our first iteration through the certificate cache does NOT // words, our first iteration through the certificate cache does NOT
// perform any operations--only queues them--so that more fine-grained // perform any operations--only queues them--so that more fine-grained
// write locks may be obtained during the actual operations. // write locks may be obtained during the actual operations.
var renewQueue, reloadQueue, deleteQueue []Certificate var renewQueue, reloadQueue, deleteQueue, ariQueue certList
certCache.mu.RLock() certCache.mu.RLock()
for certKey, cert := range certCache.cache { for certKey, cert := range certCache.cache {
@ -135,22 +135,28 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
continue continue
} }
// ACME-specific: see if if ACME Renewal Info (ARI) window needs refreshing
if cert.ari.NeedsRefresh() {
configs[cert.hash] = cfg
ariQueue = append(ariQueue, cert)
}
// if time is up or expires soon, we need to try to renew it // if time is up or expires soon, we need to try to renew it
if cert.NeedsRenewal(cfg) { if cert.NeedsRenewal(cfg) {
configs[cert.Names[0]] = cfg configs[cert.hash] = cfg
// see if the certificate in storage has already been renewed, possibly by another // see if the certificate in storage has already been renewed, possibly by another
// instance that didn't coordinate with this one; if so, just load it (this // instance that didn't coordinate with this one; if so, just load it (this
// might happen if another instance already renewed it - kinda sloppy but checking disk // might happen if another instance already renewed it - kinda sloppy but checking disk
// first is a simple way to possibly drastically reduce rate limit problems) // first is a simple way to possibly drastically reduce rate limit problems)
storedCertExpiring, err := cfg.managedCertInStorageExpiresSoon(ctx, cert) storedCertNeedsRenew, err := cfg.managedCertInStorageNeedsRenewal(ctx, cert)
if err != nil { if err != nil {
// hmm, weird, but not a big deal, maybe it was deleted or something // hmm, weird, but not a big deal, maybe it was deleted or something
log.Warn("error while checking if stored certificate is also expiring soon", log.Warn("error while checking if stored certificate is also expiring soon",
zap.Strings("identifiers", cert.Names), zap.Strings("identifiers", cert.Names),
zap.Error(err)) zap.Error(err))
} else if !storedCertExpiring { } else if !storedCertNeedsRenew {
// if the certificate is NOT expiring soon and there was no error, then we // if the certificate does NOT need renewal and there was no error, then we
// are good to just reload the certificate from storage instead of repeating // are good to just reload the certificate from storage instead of repeating
// a likely-unnecessary renewal procedure // a likely-unnecessary renewal procedure
reloadQueue = append(reloadQueue, cert) reloadQueue = append(reloadQueue, cert)
@ -161,11 +167,30 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
// NOTE: It is super-important to note that the TLS-ALPN challenge requires // NOTE: It is super-important to note that the TLS-ALPN challenge requires
// a write lock on the cache in order to complete its challenge, so it is extra // a write lock on the cache in order to complete its challenge, so it is extra
// vital that this renew operation does not happen inside our read lock! // vital that this renew operation does not happen inside our read lock!
renewQueue = append(renewQueue, cert) renewQueue.insert(cert)
} }
} }
certCache.mu.RUnlock() certCache.mu.RUnlock()
// Update ARI, and then for any certs where the ARI window changed,
// be sure to queue them for renewal if necessary
for _, cert := range ariQueue {
cfg := configs[cert.hash]
cert, changed, err := cfg.updateARI(ctx, cert, log)
if err != nil {
log.Error("updating ARI", zap.Error(err))
}
if changed && cert.NeedsRenewal(cfg) {
// it's theoretically possible that another instance already got the memo
// on the changed ARI and even renewed the cert already, and thus doing it
// here is wasteful, but I have never heard of this happening in reality,
// so to save some cycles for now I think we'll just queue it for renewal
// (notice how we use 'insert' to avoid duplicates, in case it was already
// scheduled for renewal anyway)
renewQueue.insert(cert)
}
}
// Reload certificates that merely need to be updated in memory // Reload certificates that merely need to be updated in memory
for _, oldCert := range reloadQueue { for _, oldCert := range reloadQueue {
timeLeft := expiresAt(oldCert.Leaf).Sub(time.Now().UTC()) timeLeft := expiresAt(oldCert.Leaf).Sub(time.Now().UTC())
@ -173,7 +198,7 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
zap.Strings("identifiers", oldCert.Names), zap.Strings("identifiers", oldCert.Names),
zap.Duration("remaining", timeLeft)) zap.Duration("remaining", timeLeft))
cfg := configs[oldCert.Names[0]] cfg := configs[oldCert.hash]
// crucially, this happens OUTSIDE a lock on the certCache // crucially, this happens OUTSIDE a lock on the certCache
_, err := cfg.reloadManagedCertificate(ctx, oldCert) _, err := cfg.reloadManagedCertificate(ctx, oldCert)
@ -187,7 +212,7 @@ func (certCache *Cache) RenewManagedCertificates(ctx context.Context) error {
// Renewal queue // Renewal queue
for _, oldCert := range renewQueue { for _, oldCert := range renewQueue {
cfg := configs[oldCert.Names[0]] cfg := configs[oldCert.hash]
err := certCache.queueRenewalTask(ctx, oldCert, cfg) err := certCache.queueRenewalTask(ctx, oldCert, cfg)
if err != nil { if err != nil {
log.Error("queueing renewal task", log.Error("queueing renewal task",
@ -390,6 +415,171 @@ func (certCache *Cache) updateOCSPStaples(ctx context.Context) {
} }
} }
// storageHasNewerARI returns true if the configured storage has ARI that is newer
// than that of a certificate that is already loaded, along with the value from
// storage.
func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bool, acme.RenewalInfo, error) {
storedCertData, err := cfg.loadStoredACMECertificateMetadata(ctx, cert)
if err != nil || storedCertData.RenewalInfo == nil {
return false, acme.RenewalInfo{}, err
}
// prefer stored info if it has a window and the loaded one doesn't,
// or if the one in storage has a later RetryAfter (though I suppose
// it's not guaranteed, typically those will move forward in time)
if (!cert.ari.HasWindow() && storedCertData.RenewalInfo.HasWindow()) ||
storedCertData.RenewalInfo.RetryAfter.After(*cert.ari.RetryAfter) {
return true, *storedCertData.RenewalInfo, nil
}
return false, acme.RenewalInfo{}, nil
}
// loadStoredACMECertificateMetadata loads the stored ACME certificate data
// from the cert's sidecar JSON file.
func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) {
metaBytes, err := cfg.Storage.Load(ctx, StorageKeys.SiteMeta(cert.issuerKey, cert.Names[0]))
if err != nil {
return acme.Certificate{}, fmt.Errorf("loading cert metadata: %w", err)
}
var certRes CertificateResource
if err = json.Unmarshal(metaBytes, &certRes); err != nil {
return acme.Certificate{}, fmt.Errorf("unmarshaling cert metadata: %w", err)
}
var acmeCert acme.Certificate
if err = json.Unmarshal(certRes.IssuerData, &acmeCert); err != nil {
return acme.Certificate{}, fmt.Errorf("unmarshaling potential ACME issuer metadata: %v", err)
}
return acmeCert, nil
}
// updateARI updates the cert's ACME renewal info, first by checking storage for a newer
// one, or getting it from the CA if needed. The updated info is stored in storage and
// updated in the cache. The certificate with the updated ARI is returned. If true is
// returned, the ARI window or selected time has changed, and the caller should check if
// the cert needs to be renewed now, even if there is an error.
func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) {
logger = logger.With(
zap.Strings("identifiers", cert.Names),
zap.String("cert_hash", cert.hash),
zap.String("ari_unique_id", cert.ari.UniqueIdentifier),
zap.Time("cert_expiry", cert.Leaf.NotAfter))
updatedCert = cert
oldARI := cert.ari
// see if the stored value has been refreshed already by another instance
gotNewARI, newARI, err := cfg.storageHasNewerARI(ctx, cert)
// when we're all done, log if something about the schedule is different
// ("WARN" level because ARI window changing may be a sign of external trouble
// and we want to draw their attention to a potential explanation URL)
defer func() {
changed = !newARI.SameWindow(oldARI)
if changed {
logger.Warn("ARI window or selected renewal time changed",
zap.Time("prev_start", oldARI.SuggestedWindow.Start),
zap.Time("next_start", newARI.SuggestedWindow.Start),
zap.Time("prev_end", oldARI.SuggestedWindow.End),
zap.Time("next_end", newARI.SuggestedWindow.End),
zap.Time("prev_selected_time", oldARI.SelectedTime),
zap.Time("next_selected_time", newARI.SelectedTime),
zap.String("explanation_url", newARI.ExplanationURL))
}
}()
if err == nil && gotNewARI {
// great, storage has a newer one we can use
cfg.certCache.mu.Lock()
updatedCert = cfg.certCache.cache[cert.hash]
updatedCert.ari = newARI
cfg.certCache.cache[cert.hash] = updatedCert
cfg.certCache.mu.Unlock()
logger.Info("reloaded ARI with newer one in storage",
zap.Timep("next_refresh", newARI.RetryAfter),
zap.Time("renewal_time", newARI.SelectedTime))
return
}
if err != nil {
logger.Error("error while checking storage for updated ARI; updating ARI now", zap.Error(err))
}
// of the issuers configured, hopefully one of them is the ACME CA we got the cert from
for _, iss := range cfg.Issuers {
if acmeIss, ok := iss.(*ACMEIssuer); ok {
newARI, err = acmeIss.getRenewalInfo(ctx, cert) // be sure to use existing newARI variable so we can compare against old value in the defer
if err != nil {
// could be anything, but a common error might simply be the "wrong" ACME CA
// (meaning, different from the one that issued the cert, thus the only one
// that would have any ARI for it) if multiple ACME CAs are configured
logger.Error("failed updating renewal info from ACME CA",
zap.String("issuer", iss.IssuerKey()),
zap.Error(err))
continue
}
// when we get the latest ARI, the acme package will select a time within the window
// for us; of course, since it's random, it's likely different from the previously-
// selected time; but if the window doesn't change, there's no need to change the
// selected time (the acme package doesn't know the previous window to know better)
// ... so if the window hasn't changed we'll just put back the selected time
if newARI.SameWindow(oldARI) && !oldARI.SelectedTime.IsZero() {
newARI.SelectedTime = oldARI.SelectedTime
}
// then store the updated ARI (even if the window didn't change, the Retry-After
// likely did) in cache and storage
// be sure we get the cert from the cache while inside a lock to avoid logical races
cfg.certCache.mu.Lock()
updatedCert = cfg.certCache.cache[cert.hash]
updatedCert.ari = newARI
cfg.certCache.cache[cert.hash] = updatedCert
cfg.certCache.mu.Unlock()
// update the ARI value in storage
var certData acme.Certificate
certData, err = cfg.loadStoredACMECertificateMetadata(ctx, cert)
if err != nil {
err = fmt.Errorf("got new ARI from %s, but failed loading stored certificate metadata: %v", iss.IssuerKey(), err)
return
}
certData.RenewalInfo = &newARI
var certDataBytes, certResBytes []byte
certDataBytes, err = json.Marshal(certData)
if err != nil {
err = fmt.Errorf("got new ARI from %s, but failed marshaling certificate ACME metadata: %v", iss.IssuerKey(), err)
return
}
certResBytes, err = json.MarshalIndent(CertificateResource{
SANs: cert.Names,
IssuerData: certDataBytes,
}, "", "\t")
if err != nil {
err = fmt.Errorf("got new ARI from %s, but could not re-encode certificate metadata: %v", iss.IssuerKey(), err)
return
}
if err = cfg.Storage.Store(ctx, StorageKeys.SiteMeta(cert.issuerKey, cert.Names[0]), certResBytes); err != nil {
err = fmt.Errorf("got new ARI from %s, but could not store it with certificate metadata: %v", iss.IssuerKey(), err)
return
}
logger.Info("updated ACME renewal information",
zap.Time("selected_time", newARI.SelectedTime),
zap.Timep("next_update", newARI.RetryAfter),
zap.String("explanation_url", newARI.ExplanationURL))
return
}
}
err = fmt.Errorf("could not fully update ACME renewal info: either no ACME issuer configured for certificate, or all failed (make sure the ACME CA that issued the certificate is configured)")
return
}
// CleanStorageOptions specifies how to clean up a storage unit. // CleanStorageOptions specifies how to clean up a storage unit.
type CleanStorageOptions struct { type CleanStorageOptions struct {
// Optional custom logger. // Optional custom logger.
@ -725,6 +915,19 @@ func certShouldBeForceRenewed(cert Certificate) bool {
cert.ocsp.Status == ocsp.Revoked cert.ocsp.Status == ocsp.Revoked
} }
type certList []Certificate
// insert appends cert to the list if it is not already in the list.
// Efficiency: O(n)
func (certs *certList) insert(cert Certificate) {
for _, c := range *certs {
if c.hash == cert.hash {
return
}
}
*certs = append(*certs, cert)
}
const ( const (
// DefaultRenewCheckInterval is how often to check certificates for expiration. // DefaultRenewCheckInterval is how often to check certificates for expiration.
// Scans are very lightweight, so this can be semi-frequent. This default should // Scans are very lightweight, so this can be semi-frequent. This default should

View File

@ -258,7 +258,11 @@ func (iss *ZeroSSLIssuer) Revoke(ctx context.Context, cert CertificateResource,
default: default:
return fmt.Errorf("unsupported reason: %d", reason) return fmt.Errorf("unsupported reason: %d", reason)
} }
return iss.getClient().RevokeCertificate(ctx, cert.IssuerData.(zerossl.CertificateObject).ID, r) var certObj zerossl.CertificateObject
if err := json.Unmarshal(cert.IssuerData, &certObj); err != nil {
return err
}
return iss.getClient().RevokeCertificate(ctx, certObj.ID, r)
} }
func (iss *ZeroSSLIssuer) getDistributedValidationInfo(ctx context.Context, identifier string) (acme.Challenge, bool, error) { func (iss *ZeroSSLIssuer) getDistributedValidationInfo(ctx context.Context, identifier string) (acme.Challenge, bool, error) {