Skip to content

Commit e844ead

Browse files
authored
Merge pull request #5 from ChangemakerStudios/claude/review-certificate-update-011CUw9RFfKdbGehxfDxVmef
Implement comprehensive improvements to certificate renewal reliability
2 parents 058f168 + 3323921 commit e844ead

17 files changed

+776
-42
lines changed

src/LettuceEncrypt/Internal/AcmeCertificateFactory.cs

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
using LettuceEncrypt.Accounts;
1010
using LettuceEncrypt.Acme;
1111
using LettuceEncrypt.Internal.PfxBuilder;
12+
using Microsoft.AspNetCore.Hosting.Server;
1213
using Microsoft.Extensions.Hosting;
1314
using Microsoft.Extensions.Logging;
1415
using Microsoft.Extensions.Options;
@@ -28,6 +29,7 @@ internal class AcmeCertificateFactory
2829
private readonly IDnsChallengeProvider _dnsChallengeProvider;
2930
private readonly ICertificateAuthorityConfiguration _certificateAuthority;
3031
private readonly IPfxBuilderFactory _pfxBuilderFactory;
32+
private readonly IServer _server;
3133
private readonly TaskCompletionSource<object?> _appStarted = new();
3234
private AcmeClient? _client;
3335
private IKey? _acmeAccountKey;
@@ -43,6 +45,7 @@ public AcmeCertificateFactory(
4345
ICertificateAuthorityConfiguration certificateAuthority,
4446
IDnsChallengeProvider dnsChallengeProvider,
4547
IPfxBuilderFactory pfxBuilderFactory,
48+
IServer server,
4649
IAccountStore? accountRepository = null)
4750
{
4851
_acmeClientFactory = acmeClientFactory;
@@ -55,6 +58,7 @@ public AcmeCertificateFactory(
5558
_dnsChallengeProvider = dnsChallengeProvider;
5659
_certificateAuthority = certificateAuthority;
5760
_pfxBuilderFactory = pfxBuilderFactory;
61+
_server = server;
5862

5963
appLifetime.ApplicationStarted.Register(() => _appStarted.TrySetResult(null));
6064
if (appLifetime.ApplicationStarted.IsCancellationRequested)
@@ -236,23 +240,27 @@ private async Task ValidateDomainOwnershipAsync(IAuthorizationContext authorizat
236240
cancellationToken.ThrowIfCancellationRequested();
237241

238242
var validators = new List<DomainOwnershipValidator>();
243+
var validationTimeout = _options.Value.ValidationTimeout;
244+
var validationPollInterval = _options.Value.ValidationPollInterval;
245+
var enableSelfTest = _options.Value.EnableChallengeSelfTest;
246+
var selfTestBaseUrl = _options.Value.ChallengeSelfTestBaseUrl;
239247

240248
if (_tlsAlpnChallengeResponder.IsEnabled)
241249
{
242250
validators.Add(new TlsAlpn01DomainValidator(
243-
_tlsAlpnChallengeResponder, _appLifetime, _client, _logger, domainName));
251+
_tlsAlpnChallengeResponder, _appLifetime, _client, _logger, domainName, validationTimeout, validationPollInterval));
244252
}
245253

246254
if (_options.Value.AllowedChallengeTypes.HasFlag(ChallengeType.Http01))
247255
{
248256
validators.Add(new Http01DomainValidator(
249-
_challengeStore, _appLifetime, _client, _logger, domainName));
257+
_challengeStore, _appLifetime, _client, _logger, domainName, validationTimeout, validationPollInterval, enableSelfTest, selfTestBaseUrl, _server));
250258
}
251259

252260
if (_options.Value.AllowedChallengeTypes.HasFlag(ChallengeType.Dns01))
253261
{
254262
validators.Add(new Dns01DomainValidator(
255-
_dnsChallengeProvider, _appLifetime, _client, _logger, domainName));
263+
_dnsChallengeProvider, _appLifetime, _client, _logger, domainName, validationTimeout, validationPollInterval));
256264
}
257265

258266
if (validators.Count == 0)
@@ -263,23 +271,67 @@ private async Task ValidateDomainOwnershipAsync(IAuthorizationContext authorizat
263271
"Ensure at least one kind of these challenge types is configured: " + challengeTypes);
264272
}
265273

274+
_logger.LogInformation(
275+
"Attempting domain validation for '{DomainName}' using {ValidatorCount} challenge method(s): {Validators}",
276+
domainName,
277+
validators.Count,
278+
string.Join(", ", validators.Select(v => v.GetType().Name.Replace("DomainValidator", ""))));
279+
280+
var failures = new List<Exception>();
281+
266282
foreach (var validator in validators)
267283
{
268284
cancellationToken.ThrowIfCancellationRequested();
285+
var validatorName = validator.GetType().Name.Replace("DomainValidator", "");
286+
269287
try
270288
{
289+
_logger.LogDebug("Trying {ValidatorName} validation for domain '{DomainName}'",
290+
validatorName, domainName);
291+
271292
await validator.ValidateOwnershipAsync(authorizationContext, cancellationToken);
293+
272294
// The method above raises if validation fails. If no exception occurs, we assume validation completed successfully.
295+
_logger.LogInformation(
296+
"Domain validation succeeded using {ValidatorName} for '{DomainName}'",
297+
validatorName, domainName);
273298
return;
274299
}
275300
catch (Exception ex)
276301
{
277-
_logger.LogDebug(ex, "Validation with {validatorType} failed with error: {error}",
278-
validator.GetType().Name, ex.Message);
302+
// Rethrow cancellation exceptions immediately to preserve cooperative cancellation semantics
303+
if (ex is OperationCanceledException || ex is TaskCanceledException)
304+
{
305+
throw;
306+
}
307+
308+
failures.Add(ex);
309+
_logger.LogWarning(ex,
310+
"Validation with {ValidatorName} failed for domain '{DomainName}'. " +
311+
"Error: {ErrorMessage}. " +
312+
"{RemainingValidators} validation method(s) remaining.",
313+
validatorName,
314+
domainName,
315+
ex.Message,
316+
validators.Count - failures.Count);
279317
}
280318
}
281319

282-
throw new InvalidOperationException($"Failed to validate ownership of domainName '{domainName}'");
320+
// All validators failed
321+
var failureDetails = string.Join("; ", failures.Select((ex, i) =>
322+
$"{validators[i].GetType().Name.Replace("DomainValidator", "")}: {ex.Message}"));
323+
324+
_logger.LogError(
325+
"All {ValidatorCount} validation methods failed for domain '{DomainName}'. Failures: {FailureDetails}",
326+
validators.Count,
327+
domainName,
328+
failureDetails);
329+
330+
throw new AggregateException(
331+
$"Failed to validate ownership of domainName '{domainName}' using any available challenge method. " +
332+
$"Attempted: {string.Join(", ", validators.Select(v => v.GetType().Name.Replace("DomainValidator", "")))}. " +
333+
$"See inner exceptions for details.",
334+
failures);
283335
}
284336

285337
private async Task<X509Certificate2> CompleteCertificateRequestAsync(IOrderContext order,

src/LettuceEncrypt/Internal/AcmeStates/BeginCertificateCreationState.cs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,24 @@ internal class BeginCertificateCreationState : AcmeState
1414
private readonly AcmeCertificateFactory _acmeCertificateFactory;
1515
private readonly CertificateSelector _selector;
1616
private readonly IEnumerable<ICertificateRepository> _certificateRepositories;
17+
private readonly RenewalFailureTracker _failureTracker;
1718

1819
public BeginCertificateCreationState(
19-
AcmeStateMachineContext context, ILogger<ServerStartupState> logger,
20-
IOptions<LettuceEncryptOptions> options, AcmeCertificateFactory acmeCertificateFactory,
21-
CertificateSelector selector, IEnumerable<ICertificateRepository> certificateRepositories)
20+
AcmeStateMachineContext context,
21+
ILogger<ServerStartupState> logger,
22+
IOptions<LettuceEncryptOptions> options,
23+
AcmeCertificateFactory acmeCertificateFactory,
24+
CertificateSelector selector,
25+
IEnumerable<ICertificateRepository> certificateRepositories,
26+
RenewalFailureTracker failureTracker)
2227
: base(context)
2328
{
2429
_logger = logger;
2530
_options = options;
2631
_acmeCertificateFactory = acmeCertificateFactory;
2732
_selector = selector;
2833
_certificateRepositories = certificateRepositories;
34+
_failureTracker = failureTracker;
2935
}
3036

3137
public override async Task<IAcmeState> MoveNextAsync(CancellationToken cancellationToken)
@@ -47,11 +53,31 @@ public override async Task<IAcmeState> MoveNextAsync(CancellationToken cancellat
4753
cert.Thumbprint);
4854

4955
await SaveCertificateAsync(cert, cancellationToken);
56+
57+
// Record success for all domains
58+
foreach (var domain in domainNames)
59+
{
60+
_failureTracker.RecordSuccess(domain);
61+
}
5062
}
5163
catch (Exception ex)
5264
{
65+
// Rethrow cancellation exceptions immediately to preserve cooperative cancellation semantics
66+
if (ex is OperationCanceledException || ex is TaskCanceledException)
67+
{
68+
throw;
69+
}
70+
71+
// Record failure for all domains
72+
foreach (var domain in domainNames)
73+
{
74+
_failureTracker.RecordFailure(domain, ex);
75+
}
76+
5377
_logger.LogError(0, ex, "Failed to automatically create a certificate for {hostname}", domainNames);
54-
throw;
78+
79+
// Don't throw - return to CheckForRenewalState to implement backoff
80+
// The exception has been logged and tracked
5581
}
5682

5783
return MoveTo<CheckForRenewalState>();

src/LettuceEncrypt/Internal/AcmeStates/CheckForRenewalState.cs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,21 @@ internal class CheckForRenewalState : AcmeState
1313
private readonly IOptions<LettuceEncryptOptions> _options;
1414
private readonly CertificateSelector _selector;
1515
private readonly IClock _clock;
16+
private readonly RenewalFailureTracker _failureTracker;
1617

1718
public CheckForRenewalState(
1819
AcmeStateMachineContext context,
1920
ILogger<CheckForRenewalState> logger,
2021
IOptions<LettuceEncryptOptions> options,
2122
CertificateSelector selector,
22-
IClock clock) : base(context)
23+
IClock clock,
24+
RenewalFailureTracker failureTracker) : base(context)
2325
{
2426
_logger = logger;
2527
_options = options;
2628
_selector = selector;
2729
_clock = clock;
30+
_failureTracker = failureTracker;
2831
}
2932

3033
public override async Task<IAcmeState> MoveNextAsync(CancellationToken cancellationToken)
@@ -53,6 +56,24 @@ public override async Task<IAcmeState> MoveNextAsync(CancellationToken cancellat
5356
|| cert == null
5457
|| cert.NotAfter <= _clock.Now.DateTime + daysInAdvance.Value)
5558
{
59+
var certExpiration = cert?.NotAfter ?? _clock.Now.DateTime;
60+
61+
// Check backoff policy before attempting renewal
62+
if (!_failureTracker.ShouldAttemptRenewal(domainName, certExpiration))
63+
{
64+
_logger.LogDebug(
65+
"Skipping renewal for '{DomainName}' due to backoff policy. {FailureInfo}",
66+
domainName,
67+
_failureTracker.GetFailureInfo(domainName));
68+
continue;
69+
}
70+
71+
_logger.LogInformation(
72+
"Certificate renewal needed for '{DomainName}'. Expiration: {Expiration}, Days until expiry: {DaysUntilExpiry:F1}",
73+
domainName,
74+
certExpiration,
75+
(certExpiration - _clock.Now.DateTime).TotalDays);
76+
5677
return MoveTo<BeginCertificateCreationState>();
5778
}
5879
}

src/LettuceEncrypt/Internal/Dns01DomainValidator.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ public Dns01DomainValidator(
1919
IHostApplicationLifetime appLifetime,
2020
AcmeClient client,
2121
ILogger logger,
22-
string domainName
23-
) : base(appLifetime, client, logger, domainName)
22+
string domainName,
23+
TimeSpan validationTimeout,
24+
TimeSpan validationPollInterval
25+
) : base(appLifetime, client, logger, domainName, validationTimeout, validationPollInterval)
2426
{
2527
_dnsChallengeProvider = dnsChallengeProvider;
2628
}

src/LettuceEncrypt/Internal/DomainOwnershipValidator.cs

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,22 @@ internal abstract class DomainOwnershipValidator
1414
protected readonly ILogger _logger;
1515
protected readonly string _domainName;
1616
protected readonly TaskCompletionSource<object?> _appStarted = new();
17-
18-
protected DomainOwnershipValidator(IHostApplicationLifetime appLifetime, AcmeClient client, ILogger logger, string domainName)
17+
protected readonly TimeSpan _validationTimeout;
18+
protected readonly TimeSpan _validationPollInterval;
19+
20+
protected DomainOwnershipValidator(
21+
IHostApplicationLifetime appLifetime,
22+
AcmeClient client,
23+
ILogger logger,
24+
string domainName,
25+
TimeSpan validationTimeout,
26+
TimeSpan validationPollInterval)
1927
{
2028
_client = client;
2129
_logger = logger;
2230
_domainName = domainName;
31+
_validationTimeout = validationTimeout;
32+
_validationPollInterval = validationPollInterval;
2333

2434
appLifetime.ApplicationStarted.Register(() => _appStarted.TrySetResult(null));
2535
if (appLifetime.ApplicationStarted.IsCancellationRequested)
@@ -32,25 +42,37 @@ protected DomainOwnershipValidator(IHostApplicationLifetime appLifetime, AcmeCli
3242

3343
protected async Task WaitForChallengeResultAsync(IAuthorizationContext authorizationContext, CancellationToken cancellationToken)
3444
{
35-
var retries = 60;
36-
var delay = TimeSpan.FromSeconds(2);
45+
var startTime = DateTimeOffset.UtcNow;
46+
var attempt = 0;
3747

38-
while (retries > 0)
48+
while (true)
3949
{
40-
retries--;
50+
attempt++;
51+
var elapsed = DateTimeOffset.UtcNow - startTime;
52+
53+
if (elapsed >= _validationTimeout)
54+
{
55+
throw new TimeoutException(
56+
$"Timed out after {elapsed.TotalSeconds:F1} seconds waiting for domain ownership validation of '{_domainName}'. " +
57+
$"Made {attempt} attempts. Consider increasing ValidationTimeout in LettuceEncryptOptions.");
58+
}
4159

4260
cancellationToken.ThrowIfCancellationRequested();
4361

4462
var authorization = await _client.GetAuthorizationAsync(authorizationContext);
4563

4664
_logger.LogAcmeAction("GetAuthorization");
65+
_logger.LogTrace("Validation attempt {Attempt} for domain '{DomainName}': status = {Status}, elapsed = {Elapsed:F1}s",
66+
attempt, _domainName, authorization.Status, elapsed.TotalSeconds);
4767

4868
switch (authorization.Status)
4969
{
5070
case AuthorizationStatus.Valid:
71+
_logger.LogInformation("Domain '{DomainName}' validated successfully after {Attempts} attempts in {Elapsed:F1}s",
72+
_domainName, attempt, elapsed.TotalSeconds);
5173
return;
5274
case AuthorizationStatus.Pending:
53-
await Task.Delay(delay, cancellationToken);
75+
await Task.Delay(_validationPollInterval, cancellationToken);
5476
continue;
5577
case AuthorizationStatus.Invalid:
5678
throw InvalidAuthorizationError(authorization);
@@ -66,8 +88,6 @@ protected async Task WaitForChallengeResultAsync(IAuthorizationContext authoriza
6688
"Unexpected response from server while validating domain ownership.");
6789
}
6890
}
69-
70-
throw new TimeoutException("Timed out waiting for domain ownership validation.");
7191
}
7292

7393
private Exception InvalidAuthorizationError(Authorization authorization)

0 commit comments

Comments
 (0)