mirror of
https://github.com/SoPat712/allstarr.git
synced 2026-02-09 23:55:10 -05:00
feat: add quick health checks before trying endpoints
- Health checks run in parallel with 3 second timeout - Results cached for 30 seconds to avoid excessive checks - Healthy endpoints tried first, unhealthy ones as fallback - Prevents wasting time on dead endpoints (no more 5 min waits) - Failed requests mark endpoint as unhealthy in cache - Significantly improves response time when some endpoints are down
This commit is contained in:
@@ -11,6 +11,12 @@ public class RoundRobinFallbackHelper
|
|||||||
private readonly object _urlIndexLock = new object();
|
private readonly object _urlIndexLock = new object();
|
||||||
private readonly ILogger _logger;
|
private readonly ILogger _logger;
|
||||||
private readonly string _serviceName;
|
private readonly string _serviceName;
|
||||||
|
private readonly HttpClient _healthCheckClient;
|
||||||
|
|
||||||
|
// Cache health check results for 30 seconds to avoid excessive checks
|
||||||
|
private readonly Dictionary<string, (bool isHealthy, DateTime checkedAt)> _healthCache = new();
|
||||||
|
private readonly object _healthCacheLock = new object();
|
||||||
|
private readonly TimeSpan _healthCacheExpiry = TimeSpan.FromSeconds(30);
|
||||||
|
|
||||||
public int EndpointCount => _apiUrls.Count;
|
public int EndpointCount => _apiUrls.Count;
|
||||||
|
|
||||||
@@ -24,6 +30,91 @@ public class RoundRobinFallbackHelper
|
|||||||
{
|
{
|
||||||
throw new ArgumentException("API URLs list cannot be empty", nameof(apiUrls));
|
throw new ArgumentException("API URLs list cannot be empty", nameof(apiUrls));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a dedicated HttpClient for health checks with short timeout
|
||||||
|
_healthCheckClient = new HttpClient
|
||||||
|
{
|
||||||
|
Timeout = TimeSpan.FromSeconds(3) // Quick health check timeout
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Quickly checks if an endpoint is healthy (responds within 3 seconds).
|
||||||
|
/// Results are cached for 30 seconds to avoid excessive health checks.
|
||||||
|
/// </summary>
|
||||||
|
private async Task<bool> IsEndpointHealthyAsync(string baseUrl)
|
||||||
|
{
|
||||||
|
// Check cache first
|
||||||
|
lock (_healthCacheLock)
|
||||||
|
{
|
||||||
|
if (_healthCache.TryGetValue(baseUrl, out var cached))
|
||||||
|
{
|
||||||
|
if (DateTime.UtcNow - cached.checkedAt < _healthCacheExpiry)
|
||||||
|
{
|
||||||
|
return cached.isHealthy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform health check
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var response = await _healthCheckClient.GetAsync(baseUrl, HttpCompletionOption.ResponseHeadersRead);
|
||||||
|
var isHealthy = response.IsSuccessStatusCode;
|
||||||
|
|
||||||
|
// Cache result
|
||||||
|
lock (_healthCacheLock)
|
||||||
|
{
|
||||||
|
_healthCache[baseUrl] = (isHealthy, DateTime.UtcNow);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isHealthy)
|
||||||
|
{
|
||||||
|
_logger.LogDebug("{Service} endpoint {Endpoint} health check failed: {StatusCode}",
|
||||||
|
_serviceName, baseUrl, response.StatusCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
return isHealthy;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogDebug(ex, "{Service} endpoint {Endpoint} health check failed", _serviceName, baseUrl);
|
||||||
|
|
||||||
|
// Cache as unhealthy
|
||||||
|
lock (_healthCacheLock)
|
||||||
|
{
|
||||||
|
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a list of healthy endpoints, checking them in parallel.
|
||||||
|
/// Falls back to all endpoints if none are healthy.
|
||||||
|
/// </summary>
|
||||||
|
private async Task<List<string>> GetHealthyEndpointsAsync()
|
||||||
|
{
|
||||||
|
var healthCheckTasks = _apiUrls.Select(async url => new
|
||||||
|
{
|
||||||
|
Url = url,
|
||||||
|
IsHealthy = await IsEndpointHealthyAsync(url)
|
||||||
|
}).ToList();
|
||||||
|
|
||||||
|
var results = await Task.WhenAll(healthCheckTasks);
|
||||||
|
var healthyEndpoints = results.Where(r => r.IsHealthy).Select(r => r.Url).ToList();
|
||||||
|
|
||||||
|
if (healthyEndpoints.Count == 0)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("{Service} health check: no healthy endpoints found, will try all", _serviceName);
|
||||||
|
return _apiUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("{Service} health check: {Healthy}/{Total} endpoints healthy",
|
||||||
|
_serviceName, healthyEndpoints.Count, _apiUrls.Count);
|
||||||
|
|
||||||
|
return healthyEndpoints;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -54,10 +145,14 @@ public class RoundRobinFallbackHelper
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
||||||
/// This distributes load evenly across all providers while maintaining reliability.
|
/// This distributes load evenly across all providers while maintaining reliability.
|
||||||
|
/// Performs quick health checks first to avoid wasting time on dead endpoints.
|
||||||
/// Throws exception if all endpoints fail.
|
/// Throws exception if all endpoints fail.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action)
|
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action)
|
||||||
{
|
{
|
||||||
|
// Get healthy endpoints first (with caching to avoid excessive checks)
|
||||||
|
var healthyEndpoints = await GetHealthyEndpointsAsync();
|
||||||
|
|
||||||
// Start with the next URL in round-robin to distribute load
|
// Start with the next URL in round-robin to distribute load
|
||||||
var startIndex = 0;
|
var startIndex = 0;
|
||||||
lock (_urlIndexLock)
|
lock (_urlIndexLock)
|
||||||
@@ -66,16 +161,21 @@ public class RoundRobinFallbackHelper
|
|||||||
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try healthy endpoints first, then fall back to all if needed
|
||||||
|
var endpointsToTry = healthyEndpoints.Count < _apiUrls.Count
|
||||||
|
? healthyEndpoints.Concat(_apiUrls.Except(healthyEndpoints)).ToList()
|
||||||
|
: healthyEndpoints;
|
||||||
|
|
||||||
// Try all URLs starting from the round-robin selected one
|
// Try all URLs starting from the round-robin selected one
|
||||||
for (int attempt = 0; attempt < _apiUrls.Count; attempt++)
|
for (int attempt = 0; attempt < endpointsToTry.Count; attempt++)
|
||||||
{
|
{
|
||||||
var urlIndex = (startIndex + attempt) % _apiUrls.Count;
|
var urlIndex = (startIndex + attempt) % endpointsToTry.Count;
|
||||||
var baseUrl = _apiUrls[urlIndex];
|
var baseUrl = endpointsToTry[urlIndex];
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
||||||
_serviceName, baseUrl, attempt + 1, _apiUrls.Count);
|
_serviceName, baseUrl, attempt + 1, endpointsToTry.Count);
|
||||||
return await action(baseUrl);
|
return await action(baseUrl);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
@@ -83,9 +183,15 @@ public class RoundRobinFallbackHelper
|
|||||||
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
||||||
_serviceName, baseUrl);
|
_serviceName, baseUrl);
|
||||||
|
|
||||||
if (attempt == _apiUrls.Count - 1)
|
// Mark as unhealthy in cache
|
||||||
|
lock (_healthCacheLock)
|
||||||
{
|
{
|
||||||
_logger.LogError("All {Count} {Service} endpoints failed", _apiUrls.Count, _serviceName);
|
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt == endpointsToTry.Count - 1)
|
||||||
|
{
|
||||||
|
_logger.LogError("All {Count} {Service} endpoints failed", endpointsToTry.Count, _serviceName);
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -150,10 +256,14 @@ public class RoundRobinFallbackHelper
|
|||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
/// Tries the request with the next provider in round-robin, then falls back to others on failure.
|
||||||
|
/// Performs quick health checks first to avoid wasting time on dead endpoints.
|
||||||
/// Returns default value if all endpoints fail (does not throw).
|
/// Returns default value if all endpoints fail (does not throw).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action, T defaultValue)
|
public async Task<T> TryWithFallbackAsync<T>(Func<string, Task<T>> action, T defaultValue)
|
||||||
{
|
{
|
||||||
|
// Get healthy endpoints first (with caching to avoid excessive checks)
|
||||||
|
var healthyEndpoints = await GetHealthyEndpointsAsync();
|
||||||
|
|
||||||
// Start with the next URL in round-robin to distribute load
|
// Start with the next URL in round-robin to distribute load
|
||||||
var startIndex = 0;
|
var startIndex = 0;
|
||||||
lock (_urlIndexLock)
|
lock (_urlIndexLock)
|
||||||
@@ -162,16 +272,21 @@ public class RoundRobinFallbackHelper
|
|||||||
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
_currentUrlIndex = (_currentUrlIndex + 1) % _apiUrls.Count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try healthy endpoints first, then fall back to all if needed
|
||||||
|
var endpointsToTry = healthyEndpoints.Count < _apiUrls.Count
|
||||||
|
? healthyEndpoints.Concat(_apiUrls.Except(healthyEndpoints)).ToList()
|
||||||
|
: healthyEndpoints;
|
||||||
|
|
||||||
// Try all URLs starting from the round-robin selected one
|
// Try all URLs starting from the round-robin selected one
|
||||||
for (int attempt = 0; attempt < _apiUrls.Count; attempt++)
|
for (int attempt = 0; attempt < endpointsToTry.Count; attempt++)
|
||||||
{
|
{
|
||||||
var urlIndex = (startIndex + attempt) % _apiUrls.Count;
|
var urlIndex = (startIndex + attempt) % endpointsToTry.Count;
|
||||||
var baseUrl = _apiUrls[urlIndex];
|
var baseUrl = endpointsToTry[urlIndex];
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
_logger.LogDebug("Trying {Service} endpoint {Endpoint} (attempt {Attempt}/{Total})",
|
||||||
_serviceName, baseUrl, attempt + 1, _apiUrls.Count);
|
_serviceName, baseUrl, attempt + 1, endpointsToTry.Count);
|
||||||
return await action(baseUrl);
|
return await action(baseUrl);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
@@ -179,10 +294,16 @@ public class RoundRobinFallbackHelper
|
|||||||
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
_logger.LogWarning(ex, "{Service} request failed with endpoint {Endpoint}, trying next...",
|
||||||
_serviceName, baseUrl);
|
_serviceName, baseUrl);
|
||||||
|
|
||||||
if (attempt == _apiUrls.Count - 1)
|
// Mark as unhealthy in cache
|
||||||
|
lock (_healthCacheLock)
|
||||||
|
{
|
||||||
|
_healthCache[baseUrl] = (false, DateTime.UtcNow);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attempt == endpointsToTry.Count - 1)
|
||||||
{
|
{
|
||||||
_logger.LogError("All {Count} {Service} endpoints failed, returning default value",
|
_logger.LogError("All {Count} {Service} endpoints failed, returning default value",
|
||||||
_apiUrls.Count, _serviceName);
|
endpointsToTry.Count, _serviceName);
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user