diff --git a/allstarr/Services/Common/FuzzyMatcher.cs b/allstarr/Services/Common/FuzzyMatcher.cs index fb86c14..e62d139 100644 --- a/allstarr/Services/Common/FuzzyMatcher.cs +++ b/allstarr/Services/Common/FuzzyMatcher.cs @@ -2,12 +2,64 @@ namespace allstarr.Services.Common; /// /// Provides fuzzy string matching for search result scoring. +/// OPTIMAL ORDER: 1. Strip decorators → 2. Substring matching → 3. Levenshtein → 4. Greedy assignment /// public static class FuzzyMatcher { /// - /// Calculates a similarity score between two strings (0-100). - /// Higher score means better match. + /// STEP 1: Strips common decorators from track titles to improve matching. + /// Removes: (feat. X), (with Y), (ft. Z), - From "Album", [Remix], etc. + /// This MUST be done first to avoid systematic noise in matching. + /// + public static string StripDecorators(string title) + { + if (string.IsNullOrWhiteSpace(title)) + { + return string.Empty; + } + + var cleaned = title; + + // Remove (feat. ...), (ft. ...), (with ...), (featuring ...) + cleaned = System.Text.RegularExpressions.Regex.Replace( + cleaned, + @"\s*[\(\[]?\s*(feat\.?|ft\.?|with|featuring)\s+[^\)\]]+[\)\]]?", + "", + System.Text.RegularExpressions.RegexOptions.IgnoreCase); + + // Remove - From "Album Name" or - From Album Name + cleaned = System.Text.RegularExpressions.Regex.Replace( + cleaned, + @"\s*-\s*from\s+[""']?[^""']+[""']?", + "", + System.Text.RegularExpressions.RegexOptions.IgnoreCase); + + // Remove - Remastered, - Radio Edit, etc. + cleaned = System.Text.RegularExpressions.Regex.Replace( + cleaned, + @"\s*-\s*(remaster|radio edit|single version|album version|extended|original mix)[^\-]*", + "", + System.Text.RegularExpressions.RegexOptions.IgnoreCase); + + // Remove [Remix], [Remaster], [Live], [Explicit], etc. + cleaned = System.Text.RegularExpressions.Regex.Replace( + cleaned, + @"\s*[\[\(](remix|remaster|live|acoustic|radio edit|explicit|clean|official|audio|video|lyric)[^\]\)]*[\]\)]", + "", + System.Text.RegularExpressions.RegexOptions.IgnoreCase); + + // Remove trailing/leading whitespace and normalize + cleaned = cleaned.Trim(); + + return cleaned; + } + + /// + /// Calculates similarity score following OPTIMAL ORDER: + /// 1. Strip decorators (already done by caller) + /// 2. Substring matching (cheap, high-precision) + /// 3. Levenshtein distance (expensive, fuzzy) + /// Returns score 0-100. /// public static int CalculateSimilarity(string query, string target) { @@ -16,47 +68,87 @@ public static class FuzzyMatcher return 0; } - var queryLower = NormalizeForMatching(query); - var targetLower = NormalizeForMatching(target); + var queryNorm = NormalizeForMatching(query); + var targetNorm = NormalizeForMatching(target); + // STEP 2: SUBSTRING MATCHING (cheap, high-precision) + // Exact match - if (queryLower == targetLower) + if (queryNorm == targetNorm) { return 100; } + // One string fully contains the other (substring match) + // Example: "luther" ⊂ "luther remastered" → instant win + if (targetNorm.Contains(queryNorm) || queryNorm.Contains(targetNorm)) + { + return 95; + } + // Starts with query - if (targetLower.StartsWith(queryLower)) + if (targetNorm.StartsWith(queryNorm) || queryNorm.StartsWith(targetNorm)) { return 90; } // Contains query as whole word - if (targetLower.Contains($" {queryLower} ") || - targetLower.StartsWith($"{queryLower} ") || - targetLower.EndsWith($" {queryLower}")) + if (targetNorm.Contains($" {queryNorm} ") || + targetNorm.StartsWith($"{queryNorm} ") || + targetNorm.EndsWith($" {queryNorm}") || + queryNorm.Contains($" {targetNorm} ") || + queryNorm.StartsWith($"{targetNorm} ") || + queryNorm.EndsWith($" {targetNorm}")) { - return 80; + return 85; } - // Contains query anywhere - if (targetLower.Contains(queryLower)) - { - return 70; - } - - // Calculate Levenshtein distance for fuzzy matching - var distance = LevenshteinDistance(queryLower, targetLower); - var maxLength = Math.Max(queryLower.Length, targetLower.Length); + // STEP 3: LEVENSHTEIN DISTANCE (expensive, fuzzy) + // Only use this for candidates that survived substring checks + + var distance = LevenshteinDistance(queryNorm, targetNorm); + var maxLength = Math.Max(queryNorm.Length, targetNorm.Length); if (maxLength == 0) { return 100; } - // Convert distance to similarity score (0-60 range for fuzzy matches) - var similarity = (1.0 - (double)distance / maxLength) * 60; - return (int)Math.Max(0, similarity); + // Normalize distance by length: score = 1 - (distance / max_length) + var normalizedSimilarity = 1.0 - ((double)distance / maxLength); + + // Convert to 0-80 range (reserve 80-100 for substring matches) + var score = (int)(normalizedSimilarity * 80); + + return Math.Max(0, score); + } + + /// + /// AGGRESSIVE matching that follows optimal order: + /// 1. Strip decorators FIRST + /// 2. Substring matching + /// 3. Levenshtein distance + /// Returns the best score. + /// + public static int CalculateSimilarityAggressive(string query, string target) + { + if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(target)) + { + return 0; + } + + // STEP 1: Strip decorators FIRST (always) + var queryStripped = StripDecorators(query); + var targetStripped = StripDecorators(target); + + // STEP 2-3: Substring matching + Levenshtein + var strippedScore = CalculateSimilarity(queryStripped, targetStripped); + + // Also try without stripping in case decorators are part of the actual title + var rawScore = CalculateSimilarity(query, target); + + // Return the best score + return Math.Max(rawScore, strippedScore); } /// diff --git a/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs b/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs index 7e7dacb..c2c42b7 100644 --- a/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs +++ b/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs @@ -227,6 +227,7 @@ public class SpotifyTrackMatchingService : BackgroundService /// New matching mode that uses ISRC when available for exact matches. /// Preserves track position for correct playlist ordering. /// Only matches tracks that aren't already in the Jellyfin playlist. + /// Uses GREEDY ASSIGNMENT to maximize total matches. /// private async Task MatchPlaylistTracksWithIsrcAsync( string playlistName, @@ -320,7 +321,7 @@ public class SpotifyTrackMatchingService : BackgroundService return; } - _logger.LogInformation("Matching {ToMatch}/{Total} tracks for {Playlist} (skipping {Existing} already in Jellyfin, ISRC: {IsrcEnabled})", + _logger.LogInformation("Matching {ToMatch}/{Total} tracks for {Playlist} (skipping {Existing} already in Jellyfin, ISRC: {IsrcEnabled}, AGGRESSIVE MODE)", tracksToMatch.Count, spotifyTracks.Count, playlistName, existingSpotifyIds.Count, _spotifyApiSettings.PreferIsrcMatching); // Check cache - use snapshot/timestamp to detect changes @@ -366,6 +367,9 @@ public class SpotifyTrackMatchingService : BackgroundService var isrcMatches = 0; var fuzzyMatches = 0; var noMatch = 0; + + // GREEDY ASSIGNMENT: Collect all possible matches first, then assign optimally + var allCandidates = new List<(SpotifyPlaylistTrack SpotifyTrack, Song MatchedSong, double Score, string MatchType)>(); // Process tracks in batches for parallel searching var orderedTracks = tracksToMatch.OrderBy(t => t.Position).ToList(); @@ -382,92 +386,114 @@ public class SpotifyTrackMatchingService : BackgroundService { try { - Song? matchedSong = null; - var matchType = "none"; + var candidates = new List<(Song Song, double Score, string MatchType)>(); // Try ISRC match first if available and enabled if (_spotifyApiSettings.PreferIsrcMatching && !string.IsNullOrEmpty(spotifyTrack.Isrc)) { - matchedSong = await TryMatchByIsrcAsync(spotifyTrack.Isrc, metadataService); - if (matchedSong != null) + var isrcSong = await TryMatchByIsrcAsync(spotifyTrack.Isrc, metadataService); + if (isrcSong != null) { - matchType = "isrc"; + candidates.Add((isrcSong, 100.0, "isrc")); } } - // Fall back to fuzzy matching - if (matchedSong == null) + // Always try fuzzy matching to get more candidates + var fuzzySongs = await TryMatchByFuzzyMultipleAsync( + spotifyTrack.Title, + spotifyTrack.Artists, + metadataService); + + foreach (var (song, score) in fuzzySongs) { - matchedSong = await TryMatchByFuzzyAsync( - spotifyTrack.Title, - spotifyTrack.Artists, - metadataService); - - if (matchedSong != null) - { - matchType = "fuzzy"; - } + candidates.Add((song, score, "fuzzy")); } - if (matchedSong != null) - { - var matched = new MatchedTrack - { - Position = spotifyTrack.Position, - SpotifyId = spotifyTrack.SpotifyId, - SpotifyTitle = spotifyTrack.Title, - SpotifyArtist = spotifyTrack.PrimaryArtist, - Isrc = spotifyTrack.Isrc, - MatchType = matchType, - MatchedSong = matchedSong - }; - - _logger.LogDebug(" #{Position} {Title} - {Artist} → {MatchType} match: {MatchedTitle}", - spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist, - matchType, matchedSong.Title); - - return ((MatchedTrack?)matched, matchType); - } - else - { - _logger.LogDebug(" #{Position} {Title} - {Artist} → no match", - spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist); - return ((MatchedTrack?)null, "none"); - } + return (spotifyTrack, candidates); } catch (Exception ex) { _logger.LogDebug(ex, "Failed to match track: {Title} - {Artist}", spotifyTrack.Title, spotifyTrack.PrimaryArtist); - return ((MatchedTrack?)null, "none"); + return (spotifyTrack, new List<(Song, double, string)>()); } }).ToList(); // Wait for all tracks in this batch to complete var batchResults = await Task.WhenAll(batchTasks); - // Collect results - foreach (var result in batchResults) + // Collect all candidates + foreach (var (spotifyTrack, candidates) in batchResults) { - var (matched, matchType) = result; - if (matched != null) + foreach (var (song, score, matchType) in candidates) { - matchedTracks.Add(matched); - if (matchType == "isrc") isrcMatches++; - else if (matchType == "fuzzy") fuzzyMatches++; - } - else - { - noMatch++; + allCandidates.Add((spotifyTrack, song, score, matchType)); } } - // Rate limiting between batches (not between individual tracks) + // Rate limiting between batches if (i + BatchSize < orderedTracks.Count) { await Task.Delay(DelayBetweenSearchesMs, cancellationToken); } } + + // GREEDY ASSIGNMENT: Assign each Spotify track to its best unique match + var usedSongIds = new HashSet(); + var assignments = new Dictionary(); + + // Sort candidates by score (highest first) + var sortedCandidates = allCandidates + .OrderByDescending(c => c.Score) + .ToList(); + + foreach (var (spotifyTrack, song, score, matchType) in sortedCandidates) + { + // Skip if this Spotify track already has a match + if (assignments.ContainsKey(spotifyTrack.SpotifyId)) + continue; + + // Skip if this song is already used + if (usedSongIds.Contains(song.Id)) + continue; + + // Assign this match + assignments[spotifyTrack.SpotifyId] = (song, score, matchType); + usedSongIds.Add(song.Id); + } + + // Build final matched tracks list + foreach (var spotifyTrack in orderedTracks) + { + if (assignments.TryGetValue(spotifyTrack.SpotifyId, out var match)) + { + var matched = new MatchedTrack + { + Position = spotifyTrack.Position, + SpotifyId = spotifyTrack.SpotifyId, + SpotifyTitle = spotifyTrack.Title, + SpotifyArtist = spotifyTrack.PrimaryArtist, + Isrc = spotifyTrack.Isrc, + MatchType = match.MatchType, + MatchedSong = match.Song + }; + + matchedTracks.Add(matched); + + if (match.MatchType == "isrc") isrcMatches++; + else if (match.MatchType == "fuzzy") fuzzyMatches++; + + _logger.LogDebug(" #{Position} {Title} - {Artist} → {MatchType} match (score: {Score:F1}): {MatchedTitle}", + spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist, + match.MatchType, match.Score, match.Song.Title); + } + else + { + noMatch++; + _logger.LogDebug(" #{Position} {Title} - {Artist} → no match", + spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist); + } + } if (matchedTracks.Count > 0) { @@ -483,7 +509,7 @@ public class SpotifyTrackMatchingService : BackgroundService await _cache.SetAsync(legacyKey, legacySongs, TimeSpan.FromHours(1)); _logger.LogInformation( - "✓ Cached {Matched}/{Total} tracks for {Playlist} via search (ISRC: {Isrc}, Fuzzy: {Fuzzy}, No match: {NoMatch}) - manual mappings will be applied next", + "✓ Cached {Matched}/{Total} tracks for {Playlist} via GREEDY ASSIGNMENT (ISRC: {Isrc}, Fuzzy: {Fuzzy}, No match: {NoMatch}) - manual mappings will be applied next", matchedTracks.Count, tracksToMatch.Count, playlistName, isrcMatches, fuzzyMatches, noMatch); // Pre-build playlist items cache for instant serving @@ -495,6 +521,64 @@ public class SpotifyTrackMatchingService : BackgroundService } } + /// + /// Returns multiple candidate matches with scores for greedy assignment. + /// FOLLOWS OPTIMAL ORDER: + /// 1. Strip decorators (done in FuzzyMatcher) + /// 2. Substring matching (done in FuzzyMatcher) + /// 3. Levenshtein distance (done in FuzzyMatcher) + /// This method just collects candidates; greedy assignment happens later. + /// + private async Task> TryMatchByFuzzyMultipleAsync( + string title, + List artists, + IMusicMetadataService metadataService) + { + try + { + var primaryArtist = artists.FirstOrDefault() ?? ""; + + // STEP 1: Strip decorators FIRST (before searching) + var titleStripped = FuzzyMatcher.StripDecorators(title); + var query = $"{titleStripped} {primaryArtist}"; + + var results = await metadataService.SearchSongsAsync(query, limit: 10); + + if (results.Count == 0) return new List<(Song, double)>(); + + // STEP 2-3: Score all results (substring + Levenshtein already in CalculateSimilarityAggressive) + var scoredResults = results + .Select(song => new + { + Song = song, + // Use aggressive matching which follows optimal order internally + TitleScore = FuzzyMatcher.CalculateSimilarityAggressive(title, song.Title), + ArtistScore = CalculateArtistMatchScore(artists, song.Artist, song.Contributors) + }) + .Select(x => new + { + x.Song, + x.TitleScore, + x.ArtistScore, + // Weight: 70% title, 30% artist (prioritize title matching) + TotalScore = (x.TitleScore * 0.7) + (x.ArtistScore * 0.3) + }) + .Where(x => + x.TotalScore >= 40 || + (x.ArtistScore >= 70 && x.TitleScore >= 30) || + x.TitleScore >= 85) + .OrderByDescending(x => x.TotalScore) + .Select(x => (x.Song, x.TotalScore)) + .ToList(); + + return scoredResults; + } + catch + { + return new List<(Song, double)>(); + } + } + /// /// Attempts to match a track by ISRC using provider search. /// @@ -524,7 +608,12 @@ public class SpotifyTrackMatchingService : BackgroundService } /// - /// Attempts to match a track by title and artist using fuzzy matching. + /// Attempts to match a track by title and artist using AGGRESSIVE fuzzy matching. + /// FOLLOWS OPTIMAL ORDER: + /// 1. Strip decorators FIRST (before searching) + /// 2. Substring matching (in FuzzyMatcher) + /// 3. Levenshtein distance (in FuzzyMatcher) + /// PRIORITY: Match as many tracks as possible, even with lower confidence. /// private async Task TryMatchByFuzzyAsync( string title, @@ -534,17 +623,22 @@ public class SpotifyTrackMatchingService : BackgroundService try { var primaryArtist = artists.FirstOrDefault() ?? ""; - var query = $"{title} {primaryArtist}"; - var results = await metadataService.SearchSongsAsync(query, limit: 5); + + // STEP 1: Strip decorators FIRST (before searching) + var titleStripped = FuzzyMatcher.StripDecorators(title); + var query = $"{titleStripped} {primaryArtist}"; + + var results = await metadataService.SearchSongsAsync(query, limit: 10); if (results.Count == 0) return null; - // Score all results + // STEP 2-3: Score all results (substring + Levenshtein in CalculateSimilarityAggressive) var scoredResults = results .Select(song => new { Song = song, - TitleScore = FuzzyMatcher.CalculateSimilarity(title, song.Title), + // Use aggressive matching which follows optimal order internally + TitleScore = FuzzyMatcher.CalculateSimilarityAggressive(title, song.Title), ArtistScore = CalculateArtistMatchScore(artists, song.Artist, song.Contributors) }) .Select(x => new @@ -552,27 +646,39 @@ public class SpotifyTrackMatchingService : BackgroundService x.Song, x.TitleScore, x.ArtistScore, - TotalScore = (x.TitleScore * 0.6) + (x.ArtistScore * 0.4) + // Weight: 70% title, 30% artist (prioritize title matching) + TotalScore = (x.TitleScore * 0.7) + (x.ArtistScore * 0.3) }) .OrderByDescending(x => x.TotalScore) .ToList(); var bestMatch = scoredResults.FirstOrDefault(); - // If we have a good match (50+), use it - if (bestMatch != null && bestMatch.TotalScore >= 50) + if (bestMatch == null) return null; + + // AGGRESSIVE: Accept matches with score >= 40 (was 50) + if (bestMatch.TotalScore >= 40) { + _logger.LogDebug("✓ Matched (score: {Score:F1}, title: {TitleScore}, artist: {ArtistScore}): {SpotifyTitle} → {MatchedTitle}", + bestMatch.TotalScore, bestMatch.TitleScore, bestMatch.ArtistScore, title, bestMatch.Song.Title); return bestMatch.Song; } - // Fallback: If the provider returned results and the top result has decent artist match, - // trust the provider's search algorithm (it already did fuzzy matching) - // This helps with tracks that have features/remixes in parentheses/brackets - // where the provider might format them differently - if (bestMatch != null && bestMatch.ArtistScore >= 70) + // SUPER AGGRESSIVE: If artist matches well (70+), accept even lower title scores + // This handles cases like "a" → "a-blah" where artist is the same + if (bestMatch.ArtistScore >= 70 && bestMatch.TitleScore >= 30) { - _logger.LogDebug("Using provider's top result despite low title score (Artist: {ArtistScore}, Title: {TitleScore}): {Title}", - bestMatch.ArtistScore, bestMatch.TitleScore, bestMatch.Song.Title); + _logger.LogDebug("✓ Matched via artist priority (artist: {ArtistScore}, title: {TitleScore}): {SpotifyTitle} → {MatchedTitle}", + bestMatch.ArtistScore, bestMatch.TitleScore, title, bestMatch.Song.Title); + return bestMatch.Song; + } + + // ULTRA AGGRESSIVE: If title has high substring match (85+), accept it + // This handles "luther" → "luther (feat. sza)" + if (bestMatch.TitleScore >= 85) + { + _logger.LogDebug("✓ Matched via substring (title: {TitleScore}): {SpotifyTitle} → {MatchedTitle}", + bestMatch.TitleScore, title, bestMatch.Song.Title); return bestMatch.Song; } @@ -993,7 +1099,7 @@ public class SpotifyTrackMatchingService : BackgroundService } } - // If no manual external mapping, try fuzzy matching with local Jellyfin tracks + // If no manual external mapping, try AGGRESSIVE fuzzy matching with local Jellyfin tracks double bestScore = 0; foreach (var kvp in jellyfinItemsByName) @@ -1008,11 +1114,18 @@ public class SpotifyTrackMatchingService : BackgroundService artist = artistsEl[0].GetString() ?? ""; } - var titleScore = FuzzyMatcher.CalculateSimilarity(spotifyTrack.Title, title); + // Use AGGRESSIVE matching with decorator stripping + var titleScore = FuzzyMatcher.CalculateSimilarityAggressive(spotifyTrack.Title, title); var artistScore = FuzzyMatcher.CalculateSimilarity(spotifyTrack.PrimaryArtist, artist); + + // Weight: 70% title, 30% artist (prioritize title matching) var totalScore = (titleScore * 0.7) + (artistScore * 0.3); - if (totalScore > bestScore && totalScore >= 70) + // AGGRESSIVE: Accept score >= 40 (was 70) + // Also accept if artist matches well (70+) and title is decent (30+) + var isGoodMatch = totalScore >= 40 || (artistScore >= 70 && titleScore >= 30); + + if (totalScore > bestScore && isGoodMatch) { bestScore = totalScore; matchedJellyfinItem = item;