mirror of
https://github.com/SoPat712/allstarr.git
synced 2026-02-09 23:55:10 -05:00
feat: add quick health checks before trying endpoints
- Health checks run in parallel with 3 second timeout - Results cached for 30 seconds to avoid excessive checks - Healthy endpoints tried first, unhealthy ones as fallback - Prevents wasting time on dead endpoints (no more 5 min waits) - Failed requests mark endpoint as unhealthy in cache - Significantly improves response time when some endpoints are down
This commit is contained in:
@@ -11,6 +11,12 @@ public class RoundRobinFallbackHelper
|
||||
private readonly object _urlIndexLock = new object();
|
||||
private readonly ILogger _logger;
|
||||
private readonly string _serviceName;
|
||||
private readonly HttpClient _healthCheckClient;
|
||||
|
||||
// Cache health check results for 30 seconds to avoid excessive checks
|
||||
private readonly Dictionary<string, (bool isHealthy, DateTime checkedAt)> _healthCache = new();
|
||||
private readonly object _healthCacheLock = new object();
|
||||
private readonly TimeSpan _healthCacheExpiry = TimeSpan.FromSeconds(30);
|
||||
|
||||
public int EndpointCount => _apiUrls.Count;
|
||||
|
||||
@@ -24,6 +30,91 @@ public class RoundRobinFallbackHelper
|
||||
{
|
||||
throw new ArgumentException("API URLs list cannot be empty", nameof(apiUrls));
|
||||
}
|
||||
|
||||
// Create a dedicated HttpClient for health checks with short timeout
|
||||
_healthCheckClient = new HttpClient
|
||||
{
|
||||
Timeout = TimeSpan.FromSeconds(3) // Quick health check timeout
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Quickly checks if an endpoint is healthy (responds within 3 seconds).
|
||||
/// Results are cached for 30 seconds to avoid excessive health checks.
|
||||
/// </summary>
|
||||
private async Task<bool> IsEndpointHealthyAsync(string baseUrl)
|
||||
{
|
||||
// Check cache first
|
||||
lock (_healthCacheLock)
|
||||
{
|
||||
if (_healthCache.TryGetValue(baseUrl, out var cached))
|
||||
{
|
||||
if (DateTime.UtcNow - cached.checkedAt < _healthCacheExpiry)
|
||||
{
|
||||
return cached.isHealthy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Perform health check
|
||||
try
|
||||
{
|
||||
var response = await _healthCheckClient.GetAsync(baseUrl, HttpCompletionOption.ResponseHeadersRead);
|
||||
var isHealthy = response.IsSuccessStatusCode;
|
||||
|
||||
// Cache result
|
||||
lock (_healthCacheLock)
|
||||
{
|
||||
_healthCache[baseUrl] = (isHealthy, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
if (!isHealthy)
|
||||
{
|
||||
_logger.LogDebug("{Service} endpoint {Endpoint} health check failed: {StatusCode}",
|
||||
_serviceName, baseUrl, response.StatusCode);
|
||||
}
|
||||
|
||||
return isHealthy;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "{Service} endpoint {Endpoint} health check failed", _serviceName, baseUrl);
|
||||
|
||||
// Cache as unhealthy
|
||||
lock (_healthCacheLock)
|
||||
{
|
||||
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a list of healthy endpoints, checking them in parallel.
|
||||
/// Falls back to all endpoints if none are healthy.
|
||||
/// </summary>
|
||||
private async Task<List<string>> GetHealthyEndpointsAsync()
|
||||
{
|
||||
var healthCheckTasks = _apiUrls.Select(async url => new
|
||||
{
|
||||
Url = url,
|
||||
IsHealthy = await IsEndpointHealthyAsync(url)
|
||||
}).ToList();
|
||||
|
||||
var results = await Task.WhenAll(healthCheckTasks);
|
||||
var healthyEndpoints = results.Where(r => r.IsHealthy).Select(r => r.Url).ToList();
|
||||
|
||||
if (healthyEndpoints.Count == 0)
|
||||
{
|
||||
_logger.LogWarning("{Service} health check: no healthy endpoints found, will try all", _serviceName);
|
||||
return _apiUrls;
|
||||
}
|
||||
|
||||
_logger.LogDebug("{Service} health check: {Healthy}/{Total} endpoints healthy",
|
||||
_serviceName, healthyEndpoints.Count, _apiUrls.Count);
|
||||
|
||||
return healthyEndpoints;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -54,10 +145,14 @@ public class RoundRobinFallbackHelper
|
||||
/// <summary>
|
||||
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
||||
/// This distributes load evenly across all providers while maintaining reliability.
|
||||
/// Performs quick health checks first to avoid wasting time on dead endpoints.
|
||||
/// Throws exception if all endpoints fail.
|
||||
/// </summary>
|
||||
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action)
|
||||
{
|
||||
// Get healthy endpoints first (with caching to avoid excessive checks)
|
||||
var healthyEndpoints = await GetHealthyEndpointsAsync();
|
||||
|
||||
// Start with the next URL in round-robin to distribute load
|
||||
var startIndex = 0;
|
||||
lock (_urlIndexLock)
|
||||
@@ -66,16 +161,21 @@ public class RoundRobinFallbackHelper
|
||||
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
||||
}
|
||||
|
||||
// Try healthy endpoints first, then fall back to all if needed
|
||||
var endpointsToTry = healthyEndpoints.Count < _apiUrls.Count
|
||||
? healthyEndpoints.Concat(_apiUrls.Except(healthyEndpoints)).ToList()
|
||||
: healthyEndpoints;
|
||||
|
||||
// Try all URLs starting from the round-robin selected one
|
||||
for (int attempt = 0; attempt < _apiUrls.Count; attempt++)
|
||||
for (int attempt = 0; attempt < endpointsToTry.Count; attempt++)
|
||||
{
|
||||
var urlIndex = (startIndex + attempt) % _apiUrls.Count;
|
||||
var baseUrl = _apiUrls[urlIndex];
|
||||
var urlIndex = (startIndex + attempt) % endpointsToTry.Count;
|
||||
var baseUrl = endpointsToTry[urlIndex];
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
||||
_serviceName, baseUrl, attempt + 1, _apiUrls.Count);
|
||||
_serviceName, baseUrl, attempt + 1, endpointsToTry.Count);
|
||||
return await action(baseUrl);
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -83,9 +183,15 @@ public class RoundRobinFallbackHelper
|
||||
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
||||
_serviceName, baseUrl);
|
||||
|
||||
if (attempt == _apiUrls.Count - 1)
|
||||
// Mark as unhealthy in cache
|
||||
lock (_healthCacheLock)
|
||||
{
|
||||
_logger.LogError("All {Count} {Service} endpoints failed", _apiUrls.Count, _serviceName);
|
||||
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
if (attempt == endpointsToTry.Count - 1)
|
||||
{
|
||||
_logger.LogError("All {Count} {Service} endpoints failed", endpointsToTry.Count, _serviceName);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@@ -150,10 +256,14 @@ public class RoundRobinFallbackHelper
|
||||
|
||||
/// <summary>
|
||||
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
||||
/// Performs quick health checks first to avoid wasting time on dead endpoints.
|
||||
/// Returns default value if all endpoints fail (does not throw).
|
||||
/// </summary>
|
||||
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action, T defaultValue)
|
||||
{
|
||||
// Get healthy endpoints first (with caching to avoid excessive checks)
|
||||
var healthyEndpoints = await GetHealthyEndpointsAsync();
|
||||
|
||||
// Start with the next URL in round-robin to distribute load
|
||||
var startIndex = 0;
|
||||
lock (_urlIndexLock)
|
||||
@@ -162,16 +272,21 @@ public class RoundRobinFallbackHelper
|
||||
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
||||
}
|
||||
|
||||
// Try healthy endpoints first, then fall back to all if needed
|
||||
var endpointsToTry = healthyEndpoints.Count < _apiUrls.Count
|
||||
? healthyEndpoints.Concat(_apiUrls.Except(healthyEndpoints)).ToList()
|
||||
: healthyEndpoints;
|
||||
|
||||
// Try all URLs starting from the round-robin selected one
|
||||
for (int attempt = 0; attempt < _apiUrls.Count; attempt++)
|
||||
for (int attempt = 0; attempt < endpointsToTry.Count; attempt++)
|
||||
{
|
||||
var urlIndex = (startIndex + attempt) % _apiUrls.Count;
|
||||
var baseUrl = _apiUrls[urlIndex];
|
||||
var urlIndex = (startIndex + attempt) % endpointsToTry.Count;
|
||||
var baseUrl = endpointsToTry[urlIndex];
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
||||
_serviceName, baseUrl, attempt + 1, _apiUrls.Count);
|
||||
_serviceName, baseUrl, attempt + 1, endpointsToTry.Count);
|
||||
return await action(baseUrl);
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -179,10 +294,16 @@ public class RoundRobinFallbackHelper
|
||||
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
||||
_serviceName, baseUrl);
|
||||
|
||||
if (attempt == _apiUrls.Count - 1)
|
||||
// Mark as unhealthy in cache
|
||||
lock (_healthCacheLock)
|
||||
{
|
||||
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
if (attempt == endpointsToTry.Count - 1)
|
||||
{
|
||||
_logger.LogError("All {Count} {Service} endpoints failed, returning default value",
|
||||
_apiUrls.Count, _serviceName);
|
||||
endpointsToTry.Count, _serviceName);
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user