diff --git a/allstarr/Services/Common/FuzzyMatcher.cs b/allstarr/Services/Common/FuzzyMatcher.cs
index fb86c14..e62d139 100644
--- a/allstarr/Services/Common/FuzzyMatcher.cs
+++ b/allstarr/Services/Common/FuzzyMatcher.cs
@@ -2,12 +2,64 @@ namespace allstarr.Services.Common;
///
/// Provides fuzzy string matching for search result scoring.
+/// OPTIMAL ORDER: 1. Strip decorators → 2. Substring matching → 3. Levenshtein → 4. Greedy assignment
///
public static class FuzzyMatcher
{
///
- /// Calculates a similarity score between two strings (0-100).
- /// Higher score means better match.
+ /// STEP 1: Strips common decorators from track titles to improve matching.
+ /// Removes: (feat. X), (with Y), (ft. Z), - From "Album", [Remix], etc.
+ /// This MUST be done first to avoid systematic noise in matching.
+ ///
+ public static string StripDecorators(string title)
+ {
+ if (string.IsNullOrWhiteSpace(title))
+ {
+ return string.Empty;
+ }
+
+ var cleaned = title;
+
+ // Remove (feat. ...), (ft. ...), (with ...), (featuring ...)
+ cleaned = System.Text.RegularExpressions.Regex.Replace(
+ cleaned,
+ @"\s*[\(\[]?\s*(feat\.?|ft\.?|with|featuring)\s+[^\)\]]+[\)\]]?",
+ "",
+ System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+ // Remove - From "Album Name" or - From Album Name
+ cleaned = System.Text.RegularExpressions.Regex.Replace(
+ cleaned,
+ @"\s*-\s*from\s+[""']?[^""']+[""']?",
+ "",
+ System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+ // Remove - Remastered, - Radio Edit, etc.
+ cleaned = System.Text.RegularExpressions.Regex.Replace(
+ cleaned,
+ @"\s*-\s*(remaster|radio edit|single version|album version|extended|original mix)[^\-]*",
+ "",
+ System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+ // Remove [Remix], [Remaster], [Live], [Explicit], etc.
+ cleaned = System.Text.RegularExpressions.Regex.Replace(
+ cleaned,
+ @"\s*[\[\(](remix|remaster|live|acoustic|radio edit|explicit|clean|official|audio|video|lyric)[^\]\)]*[\]\)]",
+ "",
+ System.Text.RegularExpressions.RegexOptions.IgnoreCase);
+
+ // Remove trailing/leading whitespace and normalize
+ cleaned = cleaned.Trim();
+
+ return cleaned;
+ }
+
+ ///
+ /// Calculates similarity score following OPTIMAL ORDER:
+ /// 1. Strip decorators (already done by caller)
+ /// 2. Substring matching (cheap, high-precision)
+ /// 3. Levenshtein distance (expensive, fuzzy)
+ /// Returns score 0-100.
///
public static int CalculateSimilarity(string query, string target)
{
@@ -16,47 +68,87 @@ public static class FuzzyMatcher
return 0;
}
- var queryLower = NormalizeForMatching(query);
- var targetLower = NormalizeForMatching(target);
+ var queryNorm = NormalizeForMatching(query);
+ var targetNorm = NormalizeForMatching(target);
+ // STEP 2: SUBSTRING MATCHING (cheap, high-precision)
+
// Exact match
- if (queryLower == targetLower)
+ if (queryNorm == targetNorm)
{
return 100;
}
+ // One string fully contains the other (substring match)
+ // Example: "luther" ⊂ "luther remastered" → instant win
+ if (targetNorm.Contains(queryNorm) || queryNorm.Contains(targetNorm))
+ {
+ return 95;
+ }
+
// Starts with query
- if (targetLower.StartsWith(queryLower))
+ if (targetNorm.StartsWith(queryNorm) || queryNorm.StartsWith(targetNorm))
{
return 90;
}
// Contains query as whole word
- if (targetLower.Contains($" {queryLower} ") ||
- targetLower.StartsWith($"{queryLower} ") ||
- targetLower.EndsWith($" {queryLower}"))
+ if (targetNorm.Contains($" {queryNorm} ") ||
+ targetNorm.StartsWith($"{queryNorm} ") ||
+ targetNorm.EndsWith($" {queryNorm}") ||
+ queryNorm.Contains($" {targetNorm} ") ||
+ queryNorm.StartsWith($"{targetNorm} ") ||
+ queryNorm.EndsWith($" {targetNorm}"))
{
- return 80;
+ return 85;
}
- // Contains query anywhere
- if (targetLower.Contains(queryLower))
- {
- return 70;
- }
-
- // Calculate Levenshtein distance for fuzzy matching
- var distance = LevenshteinDistance(queryLower, targetLower);
- var maxLength = Math.Max(queryLower.Length, targetLower.Length);
+ // STEP 3: LEVENSHTEIN DISTANCE (expensive, fuzzy)
+ // Only use this for candidates that survived substring checks
+
+ var distance = LevenshteinDistance(queryNorm, targetNorm);
+ var maxLength = Math.Max(queryNorm.Length, targetNorm.Length);
if (maxLength == 0)
{
return 100;
}
- // Convert distance to similarity score (0-60 range for fuzzy matches)
- var similarity = (1.0 - (double)distance / maxLength) * 60;
- return (int)Math.Max(0, similarity);
+ // Normalize distance by length: score = 1 - (distance / max_length)
+ var normalizedSimilarity = 1.0 - ((double)distance / maxLength);
+
+ // Convert to 0-80 range (reserve 80-100 for substring matches)
+ var score = (int)(normalizedSimilarity * 80);
+
+ return Math.Max(0, score);
+ }
+
+ ///
+ /// AGGRESSIVE matching that follows optimal order:
+ /// 1. Strip decorators FIRST
+ /// 2. Substring matching
+ /// 3. Levenshtein distance
+ /// Returns the best score.
+ ///
+ public static int CalculateSimilarityAggressive(string query, string target)
+ {
+ if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(target))
+ {
+ return 0;
+ }
+
+ // STEP 1: Strip decorators FIRST (always)
+ var queryStripped = StripDecorators(query);
+ var targetStripped = StripDecorators(target);
+
+ // STEP 2-3: Substring matching + Levenshtein
+ var strippedScore = CalculateSimilarity(queryStripped, targetStripped);
+
+ // Also try without stripping in case decorators are part of the actual title
+ var rawScore = CalculateSimilarity(query, target);
+
+ // Return the best score
+ return Math.Max(rawScore, strippedScore);
}
///
diff --git a/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs b/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs
index 7e7dacb..c2c42b7 100644
--- a/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs
+++ b/allstarr/Services/Spotify/SpotifyTrackMatchingService.cs
@@ -227,6 +227,7 @@ public class SpotifyTrackMatchingService : BackgroundService
/// New matching mode that uses ISRC when available for exact matches.
/// Preserves track position for correct playlist ordering.
/// Only matches tracks that aren't already in the Jellyfin playlist.
+ /// Uses GREEDY ASSIGNMENT to maximize total matches.
///
private async Task MatchPlaylistTracksWithIsrcAsync(
string playlistName,
@@ -320,7 +321,7 @@ public class SpotifyTrackMatchingService : BackgroundService
return;
}
- _logger.LogInformation("Matching {ToMatch}/{Total} tracks for {Playlist} (skipping {Existing} already in Jellyfin, ISRC: {IsrcEnabled})",
+ _logger.LogInformation("Matching {ToMatch}/{Total} tracks for {Playlist} (skipping {Existing} already in Jellyfin, ISRC: {IsrcEnabled}, AGGRESSIVE MODE)",
tracksToMatch.Count, spotifyTracks.Count, playlistName, existingSpotifyIds.Count, _spotifyApiSettings.PreferIsrcMatching);
// Check cache - use snapshot/timestamp to detect changes
@@ -366,6 +367,9 @@ public class SpotifyTrackMatchingService : BackgroundService
var isrcMatches = 0;
var fuzzyMatches = 0;
var noMatch = 0;
+
+ // GREEDY ASSIGNMENT: Collect all possible matches first, then assign optimally
+ var allCandidates = new List<(SpotifyPlaylistTrack SpotifyTrack, Song MatchedSong, double Score, string MatchType)>();
// Process tracks in batches for parallel searching
var orderedTracks = tracksToMatch.OrderBy(t => t.Position).ToList();
@@ -382,92 +386,114 @@ public class SpotifyTrackMatchingService : BackgroundService
{
try
{
- Song? matchedSong = null;
- var matchType = "none";
+ var candidates = new List<(Song Song, double Score, string MatchType)>();
// Try ISRC match first if available and enabled
if (_spotifyApiSettings.PreferIsrcMatching && !string.IsNullOrEmpty(spotifyTrack.Isrc))
{
- matchedSong = await TryMatchByIsrcAsync(spotifyTrack.Isrc, metadataService);
- if (matchedSong != null)
+ var isrcSong = await TryMatchByIsrcAsync(spotifyTrack.Isrc, metadataService);
+ if (isrcSong != null)
{
- matchType = "isrc";
+ candidates.Add((isrcSong, 100.0, "isrc"));
}
}
- // Fall back to fuzzy matching
- if (matchedSong == null)
+ // Always try fuzzy matching to get more candidates
+ var fuzzySongs = await TryMatchByFuzzyMultipleAsync(
+ spotifyTrack.Title,
+ spotifyTrack.Artists,
+ metadataService);
+
+ foreach (var (song, score) in fuzzySongs)
{
- matchedSong = await TryMatchByFuzzyAsync(
- spotifyTrack.Title,
- spotifyTrack.Artists,
- metadataService);
-
- if (matchedSong != null)
- {
- matchType = "fuzzy";
- }
+ candidates.Add((song, score, "fuzzy"));
}
- if (matchedSong != null)
- {
- var matched = new MatchedTrack
- {
- Position = spotifyTrack.Position,
- SpotifyId = spotifyTrack.SpotifyId,
- SpotifyTitle = spotifyTrack.Title,
- SpotifyArtist = spotifyTrack.PrimaryArtist,
- Isrc = spotifyTrack.Isrc,
- MatchType = matchType,
- MatchedSong = matchedSong
- };
-
- _logger.LogDebug(" #{Position} {Title} - {Artist} → {MatchType} match: {MatchedTitle}",
- spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist,
- matchType, matchedSong.Title);
-
- return ((MatchedTrack?)matched, matchType);
- }
- else
- {
- _logger.LogDebug(" #{Position} {Title} - {Artist} → no match",
- spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist);
- return ((MatchedTrack?)null, "none");
- }
+ return (spotifyTrack, candidates);
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Failed to match track: {Title} - {Artist}",
spotifyTrack.Title, spotifyTrack.PrimaryArtist);
- return ((MatchedTrack?)null, "none");
+ return (spotifyTrack, new List<(Song, double, string)>());
}
}).ToList();
// Wait for all tracks in this batch to complete
var batchResults = await Task.WhenAll(batchTasks);
- // Collect results
- foreach (var result in batchResults)
+ // Collect all candidates
+ foreach (var (spotifyTrack, candidates) in batchResults)
{
- var (matched, matchType) = result;
- if (matched != null)
+ foreach (var (song, score, matchType) in candidates)
{
- matchedTracks.Add(matched);
- if (matchType == "isrc") isrcMatches++;
- else if (matchType == "fuzzy") fuzzyMatches++;
- }
- else
- {
- noMatch++;
+ allCandidates.Add((spotifyTrack, song, score, matchType));
}
}
- // Rate limiting between batches (not between individual tracks)
+ // Rate limiting between batches
if (i + BatchSize < orderedTracks.Count)
{
await Task.Delay(DelayBetweenSearchesMs, cancellationToken);
}
}
+
+ // GREEDY ASSIGNMENT: Assign each Spotify track to its best unique match
+ var usedSongIds = new HashSet();
+ var assignments = new Dictionary();
+
+ // Sort candidates by score (highest first)
+ var sortedCandidates = allCandidates
+ .OrderByDescending(c => c.Score)
+ .ToList();
+
+ foreach (var (spotifyTrack, song, score, matchType) in sortedCandidates)
+ {
+ // Skip if this Spotify track already has a match
+ if (assignments.ContainsKey(spotifyTrack.SpotifyId))
+ continue;
+
+ // Skip if this song is already used
+ if (usedSongIds.Contains(song.Id))
+ continue;
+
+ // Assign this match
+ assignments[spotifyTrack.SpotifyId] = (song, score, matchType);
+ usedSongIds.Add(song.Id);
+ }
+
+ // Build final matched tracks list
+ foreach (var spotifyTrack in orderedTracks)
+ {
+ if (assignments.TryGetValue(spotifyTrack.SpotifyId, out var match))
+ {
+ var matched = new MatchedTrack
+ {
+ Position = spotifyTrack.Position,
+ SpotifyId = spotifyTrack.SpotifyId,
+ SpotifyTitle = spotifyTrack.Title,
+ SpotifyArtist = spotifyTrack.PrimaryArtist,
+ Isrc = spotifyTrack.Isrc,
+ MatchType = match.MatchType,
+ MatchedSong = match.Song
+ };
+
+ matchedTracks.Add(matched);
+
+ if (match.MatchType == "isrc") isrcMatches++;
+ else if (match.MatchType == "fuzzy") fuzzyMatches++;
+
+ _logger.LogDebug(" #{Position} {Title} - {Artist} → {MatchType} match (score: {Score:F1}): {MatchedTitle}",
+ spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist,
+ match.MatchType, match.Score, match.Song.Title);
+ }
+ else
+ {
+ noMatch++;
+ _logger.LogDebug(" #{Position} {Title} - {Artist} → no match",
+ spotifyTrack.Position, spotifyTrack.Title, spotifyTrack.PrimaryArtist);
+ }
+ }
if (matchedTracks.Count > 0)
{
@@ -483,7 +509,7 @@ public class SpotifyTrackMatchingService : BackgroundService
await _cache.SetAsync(legacyKey, legacySongs, TimeSpan.FromHours(1));
_logger.LogInformation(
- "✓ Cached {Matched}/{Total} tracks for {Playlist} via search (ISRC: {Isrc}, Fuzzy: {Fuzzy}, No match: {NoMatch}) - manual mappings will be applied next",
+ "✓ Cached {Matched}/{Total} tracks for {Playlist} via GREEDY ASSIGNMENT (ISRC: {Isrc}, Fuzzy: {Fuzzy}, No match: {NoMatch}) - manual mappings will be applied next",
matchedTracks.Count, tracksToMatch.Count, playlistName, isrcMatches, fuzzyMatches, noMatch);
// Pre-build playlist items cache for instant serving
@@ -495,6 +521,64 @@ public class SpotifyTrackMatchingService : BackgroundService
}
}
+ ///
+ /// Returns multiple candidate matches with scores for greedy assignment.
+ /// FOLLOWS OPTIMAL ORDER:
+ /// 1. Strip decorators (done in FuzzyMatcher)
+ /// 2. Substring matching (done in FuzzyMatcher)
+ /// 3. Levenshtein distance (done in FuzzyMatcher)
+ /// This method just collects candidates; greedy assignment happens later.
+ ///
+ private async Task> TryMatchByFuzzyMultipleAsync(
+ string title,
+ List artists,
+ IMusicMetadataService metadataService)
+ {
+ try
+ {
+ var primaryArtist = artists.FirstOrDefault() ?? "";
+
+ // STEP 1: Strip decorators FIRST (before searching)
+ var titleStripped = FuzzyMatcher.StripDecorators(title);
+ var query = $"{titleStripped} {primaryArtist}";
+
+ var results = await metadataService.SearchSongsAsync(query, limit: 10);
+
+ if (results.Count == 0) return new List<(Song, double)>();
+
+ // STEP 2-3: Score all results (substring + Levenshtein already in CalculateSimilarityAggressive)
+ var scoredResults = results
+ .Select(song => new
+ {
+ Song = song,
+ // Use aggressive matching which follows optimal order internally
+ TitleScore = FuzzyMatcher.CalculateSimilarityAggressive(title, song.Title),
+ ArtistScore = CalculateArtistMatchScore(artists, song.Artist, song.Contributors)
+ })
+ .Select(x => new
+ {
+ x.Song,
+ x.TitleScore,
+ x.ArtistScore,
+ // Weight: 70% title, 30% artist (prioritize title matching)
+ TotalScore = (x.TitleScore * 0.7) + (x.ArtistScore * 0.3)
+ })
+ .Where(x =>
+ x.TotalScore >= 40 ||
+ (x.ArtistScore >= 70 && x.TitleScore >= 30) ||
+ x.TitleScore >= 85)
+ .OrderByDescending(x => x.TotalScore)
+ .Select(x => (x.Song, x.TotalScore))
+ .ToList();
+
+ return scoredResults;
+ }
+ catch
+ {
+ return new List<(Song, double)>();
+ }
+ }
+
///
/// Attempts to match a track by ISRC using provider search.
///
@@ -524,7 +608,12 @@ public class SpotifyTrackMatchingService : BackgroundService
}
///
- /// Attempts to match a track by title and artist using fuzzy matching.
+ /// Attempts to match a track by title and artist using AGGRESSIVE fuzzy matching.
+ /// FOLLOWS OPTIMAL ORDER:
+ /// 1. Strip decorators FIRST (before searching)
+ /// 2. Substring matching (in FuzzyMatcher)
+ /// 3. Levenshtein distance (in FuzzyMatcher)
+ /// PRIORITY: Match as many tracks as possible, even with lower confidence.
///
private async Task TryMatchByFuzzyAsync(
string title,
@@ -534,17 +623,22 @@ public class SpotifyTrackMatchingService : BackgroundService
try
{
var primaryArtist = artists.FirstOrDefault() ?? "";
- var query = $"{title} {primaryArtist}";
- var results = await metadataService.SearchSongsAsync(query, limit: 5);
+
+ // STEP 1: Strip decorators FIRST (before searching)
+ var titleStripped = FuzzyMatcher.StripDecorators(title);
+ var query = $"{titleStripped} {primaryArtist}";
+
+ var results = await metadataService.SearchSongsAsync(query, limit: 10);
if (results.Count == 0) return null;
- // Score all results
+ // STEP 2-3: Score all results (substring + Levenshtein in CalculateSimilarityAggressive)
var scoredResults = results
.Select(song => new
{
Song = song,
- TitleScore = FuzzyMatcher.CalculateSimilarity(title, song.Title),
+ // Use aggressive matching which follows optimal order internally
+ TitleScore = FuzzyMatcher.CalculateSimilarityAggressive(title, song.Title),
ArtistScore = CalculateArtistMatchScore(artists, song.Artist, song.Contributors)
})
.Select(x => new
@@ -552,27 +646,39 @@ public class SpotifyTrackMatchingService : BackgroundService
x.Song,
x.TitleScore,
x.ArtistScore,
- TotalScore = (x.TitleScore * 0.6) + (x.ArtistScore * 0.4)
+ // Weight: 70% title, 30% artist (prioritize title matching)
+ TotalScore = (x.TitleScore * 0.7) + (x.ArtistScore * 0.3)
})
.OrderByDescending(x => x.TotalScore)
.ToList();
var bestMatch = scoredResults.FirstOrDefault();
- // If we have a good match (50+), use it
- if (bestMatch != null && bestMatch.TotalScore >= 50)
+ if (bestMatch == null) return null;
+
+ // AGGRESSIVE: Accept matches with score >= 40 (was 50)
+ if (bestMatch.TotalScore >= 40)
{
+ _logger.LogDebug("✓ Matched (score: {Score:F1}, title: {TitleScore}, artist: {ArtistScore}): {SpotifyTitle} → {MatchedTitle}",
+ bestMatch.TotalScore, bestMatch.TitleScore, bestMatch.ArtistScore, title, bestMatch.Song.Title);
return bestMatch.Song;
}
- // Fallback: If the provider returned results and the top result has decent artist match,
- // trust the provider's search algorithm (it already did fuzzy matching)
- // This helps with tracks that have features/remixes in parentheses/brackets
- // where the provider might format them differently
- if (bestMatch != null && bestMatch.ArtistScore >= 70)
+ // SUPER AGGRESSIVE: If artist matches well (70+), accept even lower title scores
+ // This handles cases like "a" → "a-blah" where artist is the same
+ if (bestMatch.ArtistScore >= 70 && bestMatch.TitleScore >= 30)
{
- _logger.LogDebug("Using provider's top result despite low title score (Artist: {ArtistScore}, Title: {TitleScore}): {Title}",
- bestMatch.ArtistScore, bestMatch.TitleScore, bestMatch.Song.Title);
+ _logger.LogDebug("✓ Matched via artist priority (artist: {ArtistScore}, title: {TitleScore}): {SpotifyTitle} → {MatchedTitle}",
+ bestMatch.ArtistScore, bestMatch.TitleScore, title, bestMatch.Song.Title);
+ return bestMatch.Song;
+ }
+
+ // ULTRA AGGRESSIVE: If title has high substring match (85+), accept it
+ // This handles "luther" → "luther (feat. sza)"
+ if (bestMatch.TitleScore >= 85)
+ {
+ _logger.LogDebug("✓ Matched via substring (title: {TitleScore}): {SpotifyTitle} → {MatchedTitle}",
+ bestMatch.TitleScore, title, bestMatch.Song.Title);
return bestMatch.Song;
}
@@ -993,7 +1099,7 @@ public class SpotifyTrackMatchingService : BackgroundService
}
}
- // If no manual external mapping, try fuzzy matching with local Jellyfin tracks
+ // If no manual external mapping, try AGGRESSIVE fuzzy matching with local Jellyfin tracks
double bestScore = 0;
foreach (var kvp in jellyfinItemsByName)
@@ -1008,11 +1114,18 @@ public class SpotifyTrackMatchingService : BackgroundService
artist = artistsEl[0].GetString() ?? "";
}
- var titleScore = FuzzyMatcher.CalculateSimilarity(spotifyTrack.Title, title);
+ // Use AGGRESSIVE matching with decorator stripping
+ var titleScore = FuzzyMatcher.CalculateSimilarityAggressive(spotifyTrack.Title, title);
var artistScore = FuzzyMatcher.CalculateSimilarity(spotifyTrack.PrimaryArtist, artist);
+
+ // Weight: 70% title, 30% artist (prioritize title matching)
var totalScore = (titleScore * 0.7) + (artistScore * 0.3);
- if (totalScore > bestScore && totalScore >= 70)
+ // AGGRESSIVE: Accept score >= 40 (was 70)
+ // Also accept if artist matches well (70+) and title is decent (30+)
+ var isGoodMatch = totalScore >= 40 || (artistScore >= 70 && titleScore >= 30);
+
+ if (totalScore > bestScore && isGoodMatch)
{
bestScore = totalScore;
matchedJellyfinItem = item;