5ee48a3108
This may be temporary until ARI is more mature
949 lines
35 KiB
Go
949 lines
35 KiB
Go
// Copyright 2015 Matthew Holt
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package certmagic
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"net"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mholt/acmez/v2"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/crypto/ocsp"
|
|
)
|
|
|
|
// GetCertificate gets a certificate to satisfy clientHello. In getting
|
|
// the certificate, it abides the rules and settings defined in the Config
|
|
// that matches clientHello.ServerName. It tries to get certificates in
|
|
// this order:
|
|
//
|
|
// 1. Exact match in the in-memory cache
|
|
// 2. Wildcard match in the in-memory cache
|
|
// 3. Managers (if any)
|
|
// 4. Storage (if on-demand is enabled)
|
|
// 5. Issuers (if on-demand is enabled)
|
|
//
|
|
// This method is safe for use as a tls.Config.GetCertificate callback.
|
|
//
|
|
// GetCertificate will run in a new context, use GetCertificateWithContext to provide
|
|
// a context.
|
|
func (cfg *Config) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
|
return cfg.GetCertificateWithContext(clientHello.Context(), clientHello)
|
|
}
|
|
|
|
func (cfg *Config) GetCertificateWithContext(ctx context.Context, clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
|
if err := cfg.emit(ctx, "tls_get_certificate", map[string]any{"client_hello": clientHelloWithoutConn(clientHello)}); err != nil {
|
|
cfg.Logger.Error("TLS handshake aborted by event handler",
|
|
zap.String("server_name", clientHello.ServerName),
|
|
zap.String("remote", clientHello.Conn.RemoteAddr().String()),
|
|
zap.Error(err))
|
|
return nil, fmt.Errorf("handshake aborted by event handler: %w", err)
|
|
}
|
|
|
|
if ctx == nil {
|
|
// tests can't set context on a tls.ClientHelloInfo because it's unexported :(
|
|
ctx = context.Background()
|
|
}
|
|
ctx = context.WithValue(ctx, ClientHelloInfoCtxKey, clientHello)
|
|
|
|
// special case: serve up the certificate for a TLS-ALPN ACME challenge
|
|
// (https://www.rfc-editor.org/rfc/rfc8737.html)
|
|
// "The ACME server MUST provide an ALPN extension with the single protocol
|
|
// name "acme-tls/1" and an SNI extension containing only the domain name
|
|
// being validated during the TLS handshake."
|
|
if clientHello.ServerName != "" &&
|
|
len(clientHello.SupportedProtos) == 1 &&
|
|
clientHello.SupportedProtos[0] == acmez.ACMETLS1Protocol {
|
|
challengeCert, distributed, err := cfg.getTLSALPNChallengeCert(clientHello)
|
|
if err != nil {
|
|
cfg.Logger.Error("tls-alpn challenge",
|
|
zap.String("remote_addr", clientHello.Conn.RemoteAddr().String()),
|
|
zap.String("server_name", clientHello.ServerName),
|
|
zap.Error(err))
|
|
return nil, err
|
|
}
|
|
cfg.Logger.Info("served key authentication certificate",
|
|
zap.String("server_name", clientHello.ServerName),
|
|
zap.String("challenge", "tls-alpn-01"),
|
|
zap.String("remote", clientHello.Conn.RemoteAddr().String()),
|
|
zap.Bool("distributed", distributed))
|
|
return challengeCert, nil
|
|
}
|
|
|
|
// get the certificate and serve it up
|
|
cert, err := cfg.getCertDuringHandshake(ctx, clientHello, true)
|
|
|
|
return &cert.Certificate, err
|
|
}
|
|
|
|
// getCertificateFromCache gets a certificate that matches name from the in-memory
|
|
// cache, according to the lookup table associated with cfg. The lookup then
|
|
// points to a certificate in the Instance certificate cache.
|
|
//
|
|
// The name is expected to already be normalized (e.g. lowercased).
|
|
//
|
|
// If there is no exact match for name, it will be checked against names of
|
|
// the form '*.example.com' (wildcard certificates) according to RFC 6125.
|
|
// If a match is found, matched will be true. If no matches are found, matched
|
|
// will be false and a "default" certificate will be returned with defaulted
|
|
// set to true. If defaulted is false, then no certificates were available.
|
|
//
|
|
// The logic in this function is adapted from the Go standard library,
|
|
// which is by the Go Authors.
|
|
//
|
|
// This function is safe for concurrent use.
|
|
func (cfg *Config) getCertificateFromCache(hello *tls.ClientHelloInfo) (cert Certificate, matched, defaulted bool) {
|
|
name := normalizedName(hello.ServerName)
|
|
|
|
if name == "" {
|
|
// if SNI is empty, prefer matching IP address
|
|
if hello.Conn != nil {
|
|
addr := localIPFromConn(hello.Conn)
|
|
cert, matched = cfg.selectCert(hello, addr)
|
|
if matched {
|
|
return
|
|
}
|
|
}
|
|
|
|
// use a "default" certificate by name, if specified
|
|
if cfg.DefaultServerName != "" {
|
|
normDefault := normalizedName(cfg.DefaultServerName)
|
|
cert, defaulted = cfg.selectCert(hello, normDefault)
|
|
if defaulted {
|
|
return
|
|
}
|
|
}
|
|
} else {
|
|
// if SNI is specified, try an exact match first
|
|
cert, matched = cfg.selectCert(hello, name)
|
|
if matched {
|
|
return
|
|
}
|
|
|
|
// try replacing labels in the name with
|
|
// wildcards until we get a match
|
|
labels := strings.Split(name, ".")
|
|
for i := range labels {
|
|
labels[i] = "*"
|
|
candidate := strings.Join(labels, ".")
|
|
cert, matched = cfg.selectCert(hello, candidate)
|
|
if matched {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// a fallback server name can be tried in the very niche
|
|
// case where a client sends one SNI value but expects or
|
|
// accepts a different one in return (this is sometimes
|
|
// the case with CDNs like Cloudflare that send the
|
|
// downstream ServerName in the handshake but accept
|
|
// the backend origin's true hostname in a cert).
|
|
if cfg.FallbackServerName != "" {
|
|
normFallback := normalizedName(cfg.FallbackServerName)
|
|
cert, defaulted = cfg.selectCert(hello, normFallback)
|
|
if defaulted {
|
|
return
|
|
}
|
|
}
|
|
|
|
// otherwise, we're bingo on ammo; see issues
|
|
// caddyserver/caddy#2035 and caddyserver/caddy#1303 (any
|
|
// change to certificate matching behavior must
|
|
// account for hosts defined where the hostname
|
|
// is empty or a catch-all, like ":443" or
|
|
// "0.0.0.0:443")
|
|
|
|
return
|
|
}
|
|
|
|
// selectCert uses hello to select a certificate from the
|
|
// cache for name. If cfg.CertSelection is set, it will be
|
|
// used to make the decision. Otherwise, the first matching
|
|
// unexpired cert is returned. As a special case, if no
|
|
// certificates match name and cfg.CertSelection is set,
|
|
// then all certificates in the cache will be passed in
|
|
// for the cfg.CertSelection to make the final decision.
|
|
func (cfg *Config) selectCert(hello *tls.ClientHelloInfo, name string) (Certificate, bool) {
|
|
logger := cfg.Logger.Named("handshake")
|
|
choices := cfg.certCache.getAllMatchingCerts(name)
|
|
|
|
if len(choices) == 0 {
|
|
if cfg.CertSelection == nil {
|
|
logger.Debug("no matching certificates and no custom selection logic", zap.String("identifier", name))
|
|
return Certificate{}, false
|
|
}
|
|
logger.Debug("no matching certificate; will choose from all certificates", zap.String("identifier", name))
|
|
choices = cfg.certCache.getAllCerts()
|
|
}
|
|
|
|
logger.Debug("choosing certificate",
|
|
zap.String("identifier", name),
|
|
zap.Int("num_choices", len(choices)))
|
|
|
|
if cfg.CertSelection == nil {
|
|
cert, err := DefaultCertificateSelector(hello, choices)
|
|
logger.Debug("default certificate selection results",
|
|
zap.Error(err),
|
|
zap.String("identifier", name),
|
|
zap.Strings("subjects", cert.Names),
|
|
zap.Bool("managed", cert.managed),
|
|
zap.String("issuer_key", cert.issuerKey),
|
|
zap.String("hash", cert.hash))
|
|
return cert, err == nil
|
|
}
|
|
|
|
cert, err := cfg.CertSelection.SelectCertificate(hello, choices)
|
|
|
|
logger.Debug("custom certificate selection results",
|
|
zap.Error(err),
|
|
zap.String("identifier", name),
|
|
zap.Strings("subjects", cert.Names),
|
|
zap.Bool("managed", cert.managed),
|
|
zap.String("issuer_key", cert.issuerKey),
|
|
zap.String("hash", cert.hash))
|
|
|
|
return cert, err == nil
|
|
}
|
|
|
|
// DefaultCertificateSelector is the default certificate selection logic
|
|
// given a choice of certificates. If there is at least one certificate in
|
|
// choices, it always returns a certificate without error. It chooses the
|
|
// first non-expired certificate that the client supports if possible,
|
|
// otherwise it returns an expired certificate that the client supports,
|
|
// otherwise it just returns the first certificate in the list of choices.
|
|
func DefaultCertificateSelector(hello *tls.ClientHelloInfo, choices []Certificate) (Certificate, error) {
|
|
if len(choices) == 0 {
|
|
return Certificate{}, fmt.Errorf("no certificates available")
|
|
}
|
|
now := time.Now()
|
|
best := choices[0]
|
|
for _, choice := range choices {
|
|
if err := hello.SupportsCertificate(&choice.Certificate); err != nil {
|
|
continue
|
|
}
|
|
best = choice // at least the client supports it...
|
|
if now.After(choice.Leaf.NotBefore) && now.Before(expiresAt(choice.Leaf)) {
|
|
return choice, nil // ...and unexpired, great! "Certificate, I choose you!"
|
|
}
|
|
}
|
|
return best, nil // all matching certs are expired or incompatible, oh well
|
|
}
|
|
|
|
// getCertDuringHandshake will get a certificate for hello. It first tries
|
|
// the in-memory cache. If no exact certificate for hello is in the cache, the
|
|
// config most closely corresponding to hello (like a wildcard) will be loaded.
|
|
// If none could be matched from the cache, it invokes the configured certificate
|
|
// managers to get a certificate and uses the first one that returns a certificate.
|
|
// If no certificate managers return a value, and if the config allows it
|
|
// (OnDemand!=nil) and if loadIfNecessary == true, it goes to storage to load the
|
|
// cert into the cache and serve it. If it's not on disk and if
|
|
// obtainIfNecessary == true, the certificate will be obtained from the CA, cached,
|
|
// and served. If obtainIfNecessary == true, then loadIfNecessary must also be == true.
|
|
// An error will be returned if and only if no certificate is available.
|
|
//
|
|
// This function is safe for concurrent use.
|
|
func (cfg *Config) getCertDuringHandshake(ctx context.Context, hello *tls.ClientHelloInfo, loadOrObtainIfNecessary bool) (Certificate, error) {
|
|
logger := logWithRemote(cfg.Logger.Named("handshake"), hello)
|
|
|
|
// First check our in-memory cache to see if we've already loaded it
|
|
cert, matched, defaulted := cfg.getCertificateFromCache(hello)
|
|
if matched {
|
|
logger.Debug("matched certificate in cache",
|
|
zap.Strings("subjects", cert.Names),
|
|
zap.Bool("managed", cert.managed),
|
|
zap.Time("expiration", expiresAt(cert.Leaf)),
|
|
zap.String("hash", cert.hash))
|
|
if cert.managed && cfg.OnDemand != nil && loadOrObtainIfNecessary {
|
|
// On-demand certificates are maintained in the background, but
|
|
// maintenance is triggered by handshakes instead of by a timer
|
|
// as in maintain.go.
|
|
return cfg.optionalMaintenance(ctx, cfg.Logger.Named("on_demand"), cert, hello)
|
|
}
|
|
return cert, nil
|
|
}
|
|
|
|
name := cfg.getNameFromClientHello(hello)
|
|
|
|
// By this point, we need to load or obtain a certificate. If a swarm of requests comes in for the same
|
|
// domain, avoid pounding manager or storage thousands of times simultaneously. We use a similar sync
|
|
// strategy for obtaining certificate during handshake.
|
|
certLoadWaitChansMu.Lock()
|
|
wait, ok := certLoadWaitChans[name]
|
|
if ok {
|
|
// another goroutine is already loading the cert; just wait and we'll get it from the in-memory cache
|
|
certLoadWaitChansMu.Unlock()
|
|
|
|
timeout := time.NewTimer(2 * time.Minute)
|
|
select {
|
|
case <-timeout.C:
|
|
return Certificate{}, fmt.Errorf("timed out waiting to load certificate for %s", name)
|
|
case <-ctx.Done():
|
|
timeout.Stop()
|
|
return Certificate{}, ctx.Err()
|
|
case <-wait:
|
|
timeout.Stop()
|
|
}
|
|
|
|
return cfg.getCertDuringHandshake(ctx, hello, false)
|
|
} else {
|
|
// no other goroutine is currently trying to load this cert
|
|
wait = make(chan struct{})
|
|
certLoadWaitChans[name] = wait
|
|
certLoadWaitChansMu.Unlock()
|
|
|
|
// unblock others and clean up when we're done
|
|
defer func() {
|
|
certLoadWaitChansMu.Lock()
|
|
close(wait)
|
|
delete(certLoadWaitChans, name)
|
|
certLoadWaitChansMu.Unlock()
|
|
}()
|
|
}
|
|
|
|
// If an external Manager is configured, try to get it from them.
|
|
// Only continue to use our own logic if it returns empty+nil.
|
|
externalCert, err := cfg.getCertFromAnyCertManager(ctx, hello, logger)
|
|
if err != nil {
|
|
return Certificate{}, err
|
|
}
|
|
if !externalCert.Empty() {
|
|
return externalCert, nil
|
|
}
|
|
|
|
// Make sure a certificate is allowed for the given name. If not, it doesn't make sense
|
|
// to try loading one from storage (issue #185) or obtaining one from an issuer.
|
|
if err := cfg.checkIfCertShouldBeObtained(ctx, name, false); err != nil {
|
|
return Certificate{}, fmt.Errorf("certificate is not allowed for server name %s: %w", name, err)
|
|
}
|
|
|
|
// We might be able to load or obtain a needed certificate. Load from
|
|
// storage if OnDemand is enabled, or if there is the possibility that
|
|
// a statically-managed cert was evicted from a full cache.
|
|
cfg.certCache.mu.RLock()
|
|
cacheSize := len(cfg.certCache.cache)
|
|
cfg.certCache.mu.RUnlock()
|
|
|
|
// A cert might have still been evicted from the cache even if the cache
|
|
// is no longer completely full; this happens if the newly-loaded cert is
|
|
// itself evicted (perhaps due to being expired or unmanaged at this point).
|
|
// Hence, we use an "almost full" metric to allow for the cache to not be
|
|
// perfectly full while still being able to load needed certs from storage.
|
|
// See https://caddy.community/t/error-tls-alert-internal-error-592-again/13272
|
|
// and caddyserver/caddy#4320.
|
|
cfg.certCache.optionsMu.RLock()
|
|
cacheCapacity := float64(cfg.certCache.options.Capacity)
|
|
cfg.certCache.optionsMu.RUnlock()
|
|
cacheAlmostFull := cacheCapacity > 0 && float64(cacheSize) >= cacheCapacity*.9
|
|
loadDynamically := cfg.OnDemand != nil || cacheAlmostFull
|
|
|
|
if loadDynamically && loadOrObtainIfNecessary {
|
|
// Check to see if we have one on disk
|
|
loadedCert, err := cfg.loadCertFromStorage(ctx, logger, hello)
|
|
if err == nil {
|
|
return loadedCert, nil
|
|
}
|
|
logger.Debug("did not load cert from storage",
|
|
zap.String("server_name", hello.ServerName),
|
|
zap.Error(err))
|
|
if cfg.OnDemand != nil {
|
|
// By this point, we need to ask the CA for a certificate
|
|
return cfg.obtainOnDemandCertificate(ctx, hello)
|
|
}
|
|
return loadedCert, nil
|
|
}
|
|
|
|
// Fall back to another certificate if there is one (either DefaultServerName or FallbackServerName)
|
|
if defaulted {
|
|
logger.Debug("fell back to default certificate",
|
|
zap.Strings("subjects", cert.Names),
|
|
zap.Bool("managed", cert.managed),
|
|
zap.Time("expiration", expiresAt(cert.Leaf)),
|
|
zap.String("hash", cert.hash))
|
|
return cert, nil
|
|
}
|
|
|
|
logger.Debug("no certificate matching TLS ClientHello",
|
|
zap.String("server_name", hello.ServerName),
|
|
zap.String("remote", hello.Conn.RemoteAddr().String()),
|
|
zap.String("identifier", name),
|
|
zap.Uint16s("cipher_suites", hello.CipherSuites),
|
|
zap.Float64("cert_cache_fill", float64(cacheSize)/cacheCapacity), // may be approximate! because we are not within the lock
|
|
zap.Bool("load_or_obtain_if_necessary", loadOrObtainIfNecessary),
|
|
zap.Bool("on_demand", cfg.OnDemand != nil))
|
|
|
|
return Certificate{}, fmt.Errorf("no certificate available for '%s'", name)
|
|
}
|
|
|
|
// loadCertFromStorage loads the certificate for name from storage and maintains it
|
|
// (as this is only called with on-demand TLS enabled).
|
|
func (cfg *Config) loadCertFromStorage(ctx context.Context, logger *zap.Logger, hello *tls.ClientHelloInfo) (Certificate, error) {
|
|
name := cfg.getNameFromClientHello(hello)
|
|
loadedCert, err := cfg.CacheManagedCertificate(ctx, name)
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
// If no exact match, try a wildcard variant, which is something we can still use
|
|
labels := strings.Split(name, ".")
|
|
labels[0] = "*"
|
|
loadedCert, err = cfg.CacheManagedCertificate(ctx, strings.Join(labels, "."))
|
|
}
|
|
if err != nil {
|
|
return Certificate{}, fmt.Errorf("no matching certificate to load for %s: %w", name, err)
|
|
}
|
|
logger.Debug("loaded certificate from storage",
|
|
zap.Strings("subjects", loadedCert.Names),
|
|
zap.Bool("managed", loadedCert.managed),
|
|
zap.Time("expiration", expiresAt(loadedCert.Leaf)),
|
|
zap.String("hash", loadedCert.hash))
|
|
loadedCert, err = cfg.handshakeMaintenance(ctx, hello, loadedCert)
|
|
if err != nil {
|
|
logger.Error("maintaining newly-loaded certificate",
|
|
zap.String("server_name", name),
|
|
zap.Error(err))
|
|
}
|
|
return loadedCert, nil
|
|
}
|
|
|
|
// optionalMaintenance will perform maintenance on the certificate (if necessary) and
|
|
// will return the resulting certificate. This should only be done if the certificate
|
|
// is managed, OnDemand is enabled, and the scope is allowed to obtain certificates.
|
|
func (cfg *Config) optionalMaintenance(ctx context.Context, log *zap.Logger, cert Certificate, hello *tls.ClientHelloInfo) (Certificate, error) {
|
|
newCert, err := cfg.handshakeMaintenance(ctx, hello, cert)
|
|
if err == nil {
|
|
return newCert, nil
|
|
}
|
|
|
|
log.Error("renewing certificate on-demand failed",
|
|
zap.Strings("subjects", cert.Names),
|
|
zap.Time("not_after", expiresAt(cert.Leaf)),
|
|
zap.Error(err))
|
|
|
|
if cert.Expired() {
|
|
return cert, err
|
|
}
|
|
|
|
// still has time remaining, so serve it anyway
|
|
return cert, nil
|
|
}
|
|
|
|
// checkIfCertShouldBeObtained checks to see if an on-demand TLS certificate
|
|
// should be obtained for a given domain based upon the config settings. If
|
|
// a non-nil error is returned, do not issue a new certificate for name.
|
|
func (cfg *Config) checkIfCertShouldBeObtained(ctx context.Context, name string, requireOnDemand bool) error {
|
|
if requireOnDemand && cfg.OnDemand == nil {
|
|
return fmt.Errorf("not configured for on-demand certificate issuance")
|
|
}
|
|
if !SubjectQualifiesForCert(name) {
|
|
return fmt.Errorf("subject name does not qualify for certificate: %s", name)
|
|
}
|
|
if cfg.OnDemand != nil {
|
|
if cfg.OnDemand.DecisionFunc != nil {
|
|
if err := cfg.OnDemand.DecisionFunc(ctx, name); err != nil {
|
|
return fmt.Errorf("decision func: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
if len(cfg.OnDemand.hostAllowlist) > 0 {
|
|
if _, ok := cfg.OnDemand.hostAllowlist[name]; !ok {
|
|
return fmt.Errorf("certificate for '%s' is not managed", name)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// obtainOnDemandCertificate obtains a certificate for hello.
|
|
// If another goroutine has already started obtaining a cert for
|
|
// hello, it will wait and use what the other goroutine obtained.
|
|
//
|
|
// This function is safe for use by multiple concurrent goroutines.
|
|
func (cfg *Config) obtainOnDemandCertificate(ctx context.Context, hello *tls.ClientHelloInfo) (Certificate, error) {
|
|
log := logWithRemote(cfg.Logger.Named("on_demand"), hello)
|
|
|
|
name := cfg.getNameFromClientHello(hello)
|
|
|
|
// We must protect this process from happening concurrently, so synchronize.
|
|
obtainCertWaitChansMu.Lock()
|
|
wait, ok := obtainCertWaitChans[name]
|
|
if ok {
|
|
// lucky us -- another goroutine is already obtaining the certificate.
|
|
// wait for it to finish obtaining the cert and then we'll use it.
|
|
obtainCertWaitChansMu.Unlock()
|
|
|
|
log.Debug("new certificate is needed, but is already being obtained; waiting for that issuance to complete",
|
|
zap.String("subject", name))
|
|
|
|
// TODO: see if we can get a proper context in here, for true cancellation
|
|
timeout := time.NewTimer(2 * time.Minute)
|
|
select {
|
|
case <-timeout.C:
|
|
return Certificate{}, fmt.Errorf("timed out waiting to obtain certificate for %s", name)
|
|
case <-wait:
|
|
timeout.Stop()
|
|
}
|
|
|
|
// it should now be loaded in the cache, ready to go; if not,
|
|
// the goroutine in charge of that probably had an error
|
|
return cfg.getCertDuringHandshake(ctx, hello, false)
|
|
}
|
|
|
|
// looks like it's up to us to do all the work and obtain the cert.
|
|
// make a chan others can wait on if needed
|
|
wait = make(chan struct{})
|
|
obtainCertWaitChans[name] = wait
|
|
obtainCertWaitChansMu.Unlock()
|
|
|
|
unblockWaiters := func() {
|
|
obtainCertWaitChansMu.Lock()
|
|
close(wait)
|
|
delete(obtainCertWaitChans, name)
|
|
obtainCertWaitChansMu.Unlock()
|
|
}
|
|
|
|
log.Info("obtaining new certificate", zap.String("server_name", name))
|
|
|
|
// set a timeout so we don't inadvertently hold a client handshake open too long
|
|
// (timeout duration is based on https://caddy.community/t/zerossl-dns-challenge-failing-often-route53-plugin/13822/24?u=matt)
|
|
var cancel context.CancelFunc
|
|
ctx, cancel = context.WithTimeout(ctx, 180*time.Second)
|
|
defer cancel()
|
|
|
|
// obtain the certificate (this puts it in storage) and if successful,
|
|
// load it from storage so we and any other waiting goroutine can use it
|
|
var cert Certificate
|
|
err := cfg.ObtainCertAsync(ctx, name)
|
|
if err == nil {
|
|
// load from storage while others wait to make the op as atomic as possible
|
|
cert, err = cfg.loadCertFromStorage(ctx, log, hello)
|
|
if err != nil {
|
|
log.Error("loading newly-obtained certificate from storage", zap.String("server_name", name), zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// immediately unblock anyone waiting for it
|
|
unblockWaiters()
|
|
|
|
return cert, err
|
|
}
|
|
|
|
// handshakeMaintenance performs a check on cert for expiration and OCSP validity.
|
|
// If necessary, it will renew the certificate and/or refresh the OCSP staple.
|
|
// OCSP stapling errors are not returned, only logged.
|
|
//
|
|
// This function is safe for use by multiple concurrent goroutines.
|
|
func (cfg *Config) handshakeMaintenance(ctx context.Context, hello *tls.ClientHelloInfo, cert Certificate) (Certificate, error) {
|
|
logger := cfg.Logger.Named("on_demand")
|
|
|
|
// Check OCSP staple validity
|
|
if cert.ocsp != nil && !freshOCSP(cert.ocsp) {
|
|
logger.Debug("OCSP response needs refreshing",
|
|
zap.Strings("identifiers", cert.Names),
|
|
zap.Int("ocsp_status", cert.ocsp.Status),
|
|
zap.Time("this_update", cert.ocsp.ThisUpdate),
|
|
zap.Time("next_update", cert.ocsp.NextUpdate))
|
|
|
|
err := stapleOCSP(ctx, cfg.OCSP, cfg.Storage, &cert, nil)
|
|
if err != nil {
|
|
// An error with OCSP stapling is not the end of the world, and in fact, is
|
|
// quite common considering not all certs have issuer URLs that support it.
|
|
logger.Warn("stapling OCSP",
|
|
zap.String("server_name", hello.ServerName),
|
|
zap.Strings("sans", cert.Names),
|
|
zap.Error(err))
|
|
} else {
|
|
logger.Debug("successfully stapled new OCSP response",
|
|
zap.Strings("identifiers", cert.Names),
|
|
zap.Int("ocsp_status", cert.ocsp.Status),
|
|
zap.Time("this_update", cert.ocsp.ThisUpdate),
|
|
zap.Time("next_update", cert.ocsp.NextUpdate))
|
|
}
|
|
|
|
// our copy of cert has the new OCSP staple, so replace it in the cache
|
|
cfg.certCache.mu.Lock()
|
|
cfg.certCache.cache[cert.hash] = cert
|
|
cfg.certCache.mu.Unlock()
|
|
}
|
|
|
|
// Check ARI status
|
|
if !cfg.DisableARI && cert.ari.NeedsRefresh() {
|
|
// we ignore the second return value here because we go on to check renewal status below regardless
|
|
var err error
|
|
cert, _, err = cfg.updateARI(ctx, cert, logger)
|
|
if err != nil {
|
|
logger.Error("updated ARI", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// We attempt to replace any certificates that were revoked.
|
|
// Crucially, this happens OUTSIDE a lock on the certCache.
|
|
if certShouldBeForceRenewed(cert) {
|
|
logger.Warn("on-demand certificate's OCSP status is REVOKED; will try to forcefully renew",
|
|
zap.Strings("identifiers", cert.Names),
|
|
zap.Int("ocsp_status", cert.ocsp.Status),
|
|
zap.Time("revoked_at", cert.ocsp.RevokedAt),
|
|
zap.Time("this_update", cert.ocsp.ThisUpdate),
|
|
zap.Time("next_update", cert.ocsp.NextUpdate))
|
|
return cfg.renewDynamicCertificate(ctx, hello, cert)
|
|
}
|
|
|
|
// Check cert expiration
|
|
if cfg.certNeedsRenewal(cert.Leaf, cert.ari, true) {
|
|
// Check if the certificate still exists on disk. If not, we need to obtain a new one.
|
|
// This can happen if the certificate was cleaned up by the storage cleaner, but still
|
|
// remains in the in-memory cache.
|
|
if !cfg.storageHasCertResourcesAnyIssuer(ctx, cert.Names[0]) {
|
|
logger.Debug("certificate not found on disk; obtaining new certificate",
|
|
zap.Strings("identifiers", cert.Names))
|
|
return cfg.obtainOnDemandCertificate(ctx, hello)
|
|
}
|
|
// Otherwise, renew the certificate.
|
|
return cfg.renewDynamicCertificate(ctx, hello, cert)
|
|
}
|
|
|
|
return cert, nil
|
|
}
|
|
|
|
// renewDynamicCertificate renews the certificate for name using cfg. It returns the
|
|
// certificate to use and an error, if any. name should already be lower-cased before
|
|
// calling this function. name is the name obtained directly from the handshake's
|
|
// ClientHello. If the certificate hasn't yet expired, currentCert will be returned
|
|
// and the renewal will happen in the background; otherwise this blocks until the
|
|
// certificate has been renewed, and returns the renewed certificate.
|
|
//
|
|
// If the certificate's OCSP status (currentCert.ocsp) is Revoked, it will be forcefully
|
|
// renewed even if it is not expiring.
|
|
//
|
|
// This function is safe for use by multiple concurrent goroutines.
|
|
func (cfg *Config) renewDynamicCertificate(ctx context.Context, hello *tls.ClientHelloInfo, currentCert Certificate) (Certificate, error) {
|
|
logger := logWithRemote(cfg.Logger.Named("on_demand"), hello)
|
|
|
|
name := cfg.getNameFromClientHello(hello)
|
|
timeLeft := time.Until(expiresAt(currentCert.Leaf))
|
|
revoked := currentCert.ocsp != nil && currentCert.ocsp.Status == ocsp.Revoked
|
|
|
|
// see if another goroutine is already working on this certificate
|
|
obtainCertWaitChansMu.Lock()
|
|
wait, ok := obtainCertWaitChans[name]
|
|
if ok {
|
|
// lucky us -- another goroutine is already renewing the certificate
|
|
obtainCertWaitChansMu.Unlock()
|
|
|
|
// the current certificate hasn't expired, and another goroutine is already
|
|
// renewing it, so we might as well serve what we have without blocking, UNLESS
|
|
// we're forcing renewal, in which case the current certificate is not usable
|
|
if timeLeft > 0 && !revoked {
|
|
logger.Debug("certificate expires soon but is already being renewed; serving current certificate",
|
|
zap.Strings("subjects", currentCert.Names),
|
|
zap.Duration("remaining", timeLeft))
|
|
return currentCert, nil
|
|
}
|
|
|
|
// otherwise, we'll have to wait for the renewal to finish so we don't serve
|
|
// a revoked or expired certificate
|
|
|
|
logger.Debug("certificate has expired, but is already being renewed; waiting for renewal to complete",
|
|
zap.Strings("subjects", currentCert.Names),
|
|
zap.Time("expired", expiresAt(currentCert.Leaf)),
|
|
zap.Bool("revoked", revoked))
|
|
|
|
// TODO: see if we can get a proper context in here, for true cancellation
|
|
timeout := time.NewTimer(2 * time.Minute)
|
|
select {
|
|
case <-timeout.C:
|
|
return Certificate{}, fmt.Errorf("timed out waiting for certificate renewal of %s", name)
|
|
case <-wait:
|
|
timeout.Stop()
|
|
}
|
|
|
|
// it should now be loaded in the cache, ready to go; if not,
|
|
// the goroutine in charge of that probably had an error
|
|
return cfg.getCertDuringHandshake(ctx, hello, false)
|
|
}
|
|
|
|
// looks like it's up to us to do all the work and renew the cert
|
|
wait = make(chan struct{})
|
|
obtainCertWaitChans[name] = wait
|
|
obtainCertWaitChansMu.Unlock()
|
|
|
|
unblockWaiters := func() {
|
|
obtainCertWaitChansMu.Lock()
|
|
close(wait)
|
|
delete(obtainCertWaitChans, name)
|
|
obtainCertWaitChansMu.Unlock()
|
|
}
|
|
|
|
logger = logger.With(
|
|
zap.String("server_name", name),
|
|
zap.Strings("subjects", currentCert.Names),
|
|
zap.Time("expiration", expiresAt(currentCert.Leaf)),
|
|
zap.Duration("remaining", timeLeft),
|
|
zap.Bool("revoked", revoked),
|
|
)
|
|
|
|
// Renew and reload the certificate
|
|
renewAndReload := func(ctx context.Context, cancel context.CancelFunc) (Certificate, error) {
|
|
defer cancel()
|
|
|
|
// Make sure a certificate for this name should be renewed on-demand
|
|
err := cfg.checkIfCertShouldBeObtained(ctx, name, true)
|
|
if err != nil {
|
|
// if not, remove from cache (it will be deleted from storage later)
|
|
cfg.certCache.mu.Lock()
|
|
cfg.certCache.removeCertificate(currentCert)
|
|
cfg.certCache.mu.Unlock()
|
|
unblockWaiters()
|
|
|
|
if logger != nil {
|
|
logger.Error("certificate should not be obtained", zap.Error(err))
|
|
}
|
|
|
|
return Certificate{}, err
|
|
}
|
|
|
|
logger.Info("attempting certificate renewal")
|
|
|
|
// otherwise, renew with issuer, etc.
|
|
var newCert Certificate
|
|
if revoked {
|
|
newCert, err = cfg.forceRenew(ctx, logger, currentCert)
|
|
} else {
|
|
err = cfg.RenewCertAsync(ctx, name, false)
|
|
if err == nil {
|
|
// load from storage while in lock to make the replacement as atomic as possible
|
|
newCert, err = cfg.reloadManagedCertificate(ctx, currentCert)
|
|
}
|
|
}
|
|
|
|
// immediately unblock anyone waiting for it; doing this in
|
|
// a defer would risk deadlock because of the recursive call
|
|
// to getCertDuringHandshake below when we return!
|
|
unblockWaiters()
|
|
|
|
if err != nil {
|
|
logger.Error("renewing and reloading certificate", zap.String("server_name", name), zap.Error(err))
|
|
}
|
|
|
|
return newCert, err
|
|
}
|
|
|
|
// if the certificate hasn't expired, we can serve what we have and renew in the background
|
|
if timeLeft > 0 {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
|
go renewAndReload(ctx, cancel)
|
|
return currentCert, nil
|
|
}
|
|
|
|
// otherwise, we have to block while we renew an expired certificate
|
|
ctx, cancel := context.WithTimeout(ctx, 90*time.Second)
|
|
return renewAndReload(ctx, cancel)
|
|
}
|
|
|
|
// getCertFromAnyCertManager gets a certificate from cfg's Managers. If there are no Managers defined, this is
|
|
// a no-op that returns empty values. Otherwise, it gets a certificate for hello from the first Manager that
|
|
// returns a certificate and no error.
|
|
func (cfg *Config) getCertFromAnyCertManager(ctx context.Context, hello *tls.ClientHelloInfo, logger *zap.Logger) (Certificate, error) {
|
|
// fast path if nothing to do
|
|
if cfg.OnDemand == nil || len(cfg.OnDemand.Managers) == 0 {
|
|
return Certificate{}, nil
|
|
}
|
|
|
|
// try all the GetCertificate methods on external managers; use first one that returns a certificate
|
|
var upstreamCert *tls.Certificate
|
|
var err error
|
|
for i, certManager := range cfg.OnDemand.Managers {
|
|
upstreamCert, err = certManager.GetCertificate(ctx, hello)
|
|
if err != nil {
|
|
logger.Error("external certificate manager",
|
|
zap.String("sni", hello.ServerName),
|
|
zap.String("cert_manager", fmt.Sprintf("%T", certManager)),
|
|
zap.Int("cert_manager_idx", i),
|
|
zap.Error(err))
|
|
continue
|
|
}
|
|
if upstreamCert != nil {
|
|
break
|
|
}
|
|
}
|
|
if err != nil {
|
|
return Certificate{}, fmt.Errorf("external certificate manager indicated that it is unable to yield certificate: %v", err)
|
|
}
|
|
if upstreamCert == nil {
|
|
logger.Debug("all external certificate managers yielded no certificates and no errors", zap.String("sni", hello.ServerName))
|
|
return Certificate{}, nil
|
|
}
|
|
|
|
var cert Certificate
|
|
if err = fillCertFromLeaf(&cert, *upstreamCert); err != nil {
|
|
return Certificate{}, fmt.Errorf("external certificate manager: %s: filling cert from leaf: %v", hello.ServerName, err)
|
|
}
|
|
|
|
logger.Debug("using externally-managed certificate",
|
|
zap.String("sni", hello.ServerName),
|
|
zap.Strings("names", cert.Names),
|
|
zap.Time("expiration", expiresAt(cert.Leaf)))
|
|
|
|
return cert, nil
|
|
}
|
|
|
|
// getTLSALPNChallengeCert is to be called when the clientHello pertains to
|
|
// a TLS-ALPN challenge and a certificate is required to solve it. This method gets
|
|
// the relevant challenge info and then returns the associated certificate (if any)
|
|
// or generates it anew if it's not available (as is the case when distributed
|
|
// solving). True is returned if the challenge is being solved distributed (there
|
|
// is no semantic difference with distributed solving; it is mainly for logging).
|
|
func (cfg *Config) getTLSALPNChallengeCert(clientHello *tls.ClientHelloInfo) (*tls.Certificate, bool, error) {
|
|
chalData, distributed, err := cfg.getChallengeInfo(clientHello.Context(), clientHello.ServerName)
|
|
if err != nil {
|
|
return nil, distributed, err
|
|
}
|
|
|
|
// fast path: we already created the certificate (this avoids having to re-create
|
|
// it at every handshake that tries to verify, e.g. multi-perspective validation)
|
|
if chalData.data != nil {
|
|
return chalData.data.(*tls.Certificate), distributed, nil
|
|
}
|
|
|
|
// otherwise, we can re-create the solution certificate, but it takes a few cycles
|
|
cert, err := acmez.TLSALPN01ChallengeCert(chalData.Challenge)
|
|
if err != nil {
|
|
return nil, distributed, fmt.Errorf("making TLS-ALPN challenge certificate: %v", err)
|
|
}
|
|
if cert == nil {
|
|
return nil, distributed, fmt.Errorf("got nil TLS-ALPN challenge certificate but no error")
|
|
}
|
|
|
|
return cert, distributed, nil
|
|
}
|
|
|
|
// getNameFromClientHello returns a normalized form of hello.ServerName.
|
|
// If hello.ServerName is empty (i.e. client did not use SNI), then the
|
|
// associated connection's local address is used to extract an IP address.
|
|
func (cfg *Config) getNameFromClientHello(hello *tls.ClientHelloInfo) string {
|
|
if name := normalizedName(hello.ServerName); name != "" {
|
|
return name
|
|
}
|
|
if cfg.DefaultServerName != "" {
|
|
return normalizedName(cfg.DefaultServerName)
|
|
}
|
|
return localIPFromConn(hello.Conn)
|
|
}
|
|
|
|
// logWithRemote adds the remote host and port to the logger.
|
|
func logWithRemote(l *zap.Logger, hello *tls.ClientHelloInfo) *zap.Logger {
|
|
if hello.Conn == nil || l == nil {
|
|
return l
|
|
}
|
|
addr := hello.Conn.RemoteAddr().String()
|
|
ip, port, err := net.SplitHostPort(addr)
|
|
if err != nil {
|
|
ip = addr
|
|
port = ""
|
|
}
|
|
return l.With(zap.String("remote_ip", ip), zap.String("remote_port", port))
|
|
}
|
|
|
|
// localIPFromConn returns the host portion of c's local address
|
|
// and strips the scope ID if one exists (see RFC 4007).
|
|
func localIPFromConn(c net.Conn) string {
|
|
if c == nil {
|
|
return ""
|
|
}
|
|
localAddr := c.LocalAddr().String()
|
|
ip, _, err := net.SplitHostPort(localAddr)
|
|
if err != nil {
|
|
// OK; assume there was no port
|
|
ip = localAddr
|
|
}
|
|
// IPv6 addresses can have scope IDs, e.g. "fe80::4c3:3cff:fe4f:7e0b%eth0",
|
|
// but for our purposes, these are useless (unless a valid use case proves
|
|
// otherwise; see issue #3911)
|
|
if scopeIDStart := strings.Index(ip, "%"); scopeIDStart > -1 {
|
|
ip = ip[:scopeIDStart]
|
|
}
|
|
return ip
|
|
}
|
|
|
|
// normalizedName returns a cleaned form of serverName that is
|
|
// used for consistency when referring to a SNI value.
|
|
func normalizedName(serverName string) string {
|
|
return strings.ToLower(strings.TrimSpace(serverName))
|
|
}
|
|
|
|
// obtainCertWaitChans is used to coordinate obtaining certs for each hostname.
|
|
var (
|
|
obtainCertWaitChans = make(map[string]chan struct{})
|
|
obtainCertWaitChansMu sync.Mutex
|
|
)
|
|
|
|
// TODO: this lockset should probably be per-cache
|
|
var (
|
|
certLoadWaitChans = make(map[string]chan struct{})
|
|
certLoadWaitChansMu sync.Mutex
|
|
)
|
|
|
|
type serializableClientHello struct {
|
|
CipherSuites []uint16
|
|
ServerName string
|
|
SupportedCurves []tls.CurveID
|
|
SupportedPoints []uint8
|
|
SignatureSchemes []tls.SignatureScheme
|
|
SupportedProtos []string
|
|
SupportedVersions []uint16
|
|
|
|
RemoteAddr, LocalAddr net.Addr // values copied from the Conn as they are still useful/needed
|
|
conn net.Conn // unexported so it's not serialized
|
|
}
|
|
|
|
// clientHelloWithoutConn returns the data from the ClientHelloInfo without the
|
|
// pesky exported Conn field, which often causes an error when serializing because
|
|
// the underlying type may be unserializable.
|
|
func clientHelloWithoutConn(hello *tls.ClientHelloInfo) serializableClientHello {
|
|
if hello == nil {
|
|
return serializableClientHello{}
|
|
}
|
|
var remote, local net.Addr
|
|
if hello.Conn != nil {
|
|
remote = hello.Conn.RemoteAddr()
|
|
local = hello.Conn.LocalAddr()
|
|
}
|
|
return serializableClientHello{
|
|
CipherSuites: hello.CipherSuites,
|
|
ServerName: hello.ServerName,
|
|
SupportedCurves: hello.SupportedCurves,
|
|
SupportedPoints: hello.SupportedPoints,
|
|
SignatureSchemes: hello.SignatureSchemes,
|
|
SupportedProtos: hello.SupportedProtos,
|
|
SupportedVersions: hello.SupportedVersions,
|
|
RemoteAddr: remote,
|
|
LocalAddr: local,
|
|
conn: hello.Conn,
|
|
}
|
|
}
|
|
|
|
type helloInfoCtxKey string
|
|
|
|
// ClientHelloInfoCtxKey is the key by which the ClientHelloInfo can be extracted from
|
|
// a context.Context within a DecisionFunc. However, be advised that it is best practice
|
|
// that the decision whether to obtain a certificate is be based solely on the name,
|
|
// not other properties of the specific connection/client requesting the connection.
|
|
// For example, it is not adviseable to use a client's IP address to decide whether to
|
|
// allow a certificate. Instead, the ClientHello can be useful for logging, etc.
|
|
const ClientHelloInfoCtxKey helloInfoCtxKey = "certmagic:ClientHelloInfo"
|