diff --git a/backend/app/services/bandcamp.py b/backend/app/services/bandcamp.py index d150082..217fc3a 100644 --- a/backend/app/services/bandcamp.py +++ b/backend/app/services/bandcamp.py @@ -1,4 +1,5 @@ import re +from difflib import SequenceMatcher import httpx @@ -11,6 +12,46 @@ HEADERS = { } +def _normalize(s: str) -> str: + """Normalize string for comparison.""" + return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() + + +def _similarity(a: str, b: str) -> float: + """Return similarity ratio between two strings.""" + return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() + + +async def search_bandcamp_verified(artist: str, title: str) -> dict | None: + """Search Bandcamp and only return a result if the artist actually matches. + + Returns the best matching result or None if no good match found. + """ + # Try track search first: "artist title" + results = await search_bandcamp(f"{artist} {title}", item_type="t") + for r in results: + artist_sim = _similarity(r.get("artist", ""), artist) + title_sim = _similarity(r.get("title", ""), title) + # Require artist to be a strong match (>0.6) and title reasonable (>0.4) + if artist_sim >= 0.6 and title_sim >= 0.4: + return r + # Or if artist is very close, accept even if title differs (different track by same artist) + if artist_sim >= 0.8: + return r + + # Try artist/band search as fallback + results = await search_bandcamp(artist, item_type="b") + for r in results: + artist_sim = _similarity(r.get("title", ""), artist) # For band results, title IS the band name + if artist_sim >= 0.6: + return r + artist_sim = _similarity(r.get("artist", ""), artist) + if artist_sim >= 0.6: + return r + + return None + + async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]: """Search Bandcamp for tracks, albums, or artists. diff --git a/backend/app/services/recommender.py b/backend/app/services/recommender.py index 8fd2123..77299d4 100644 --- a/backend/app/services/recommender.py +++ b/backend/app/services/recommender.py @@ -158,7 +158,7 @@ Return ONLY the JSON array, no other text.""" except json.JSONDecodeError: return [], remaining - from app.services.bandcamp import search_bandcamp + from app.services.bandcamp import search_bandcamp_verified # Save to DB — in bandcamp mode, only keep results verified on Bandcamp recommendations = [] @@ -169,16 +169,13 @@ Return ONLY the JSON array, no other text.""" bandcamp_url = None if bandcamp_mode: try: - results = await search_bandcamp( - f"{rec.get('artist', '')} {rec.get('title', '')}", item_type="t" + match = await search_bandcamp_verified( + rec.get("artist", ""), rec.get("title", "") ) - if not results: - # Try artist-only search as fallback - results = await search_bandcamp(rec.get("artist", ""), item_type="b") - if results: - bandcamp_url = results[0].get("bandcamp_url") + if match: + bandcamp_url = match.get("bandcamp_url") else: - # Not on Bandcamp — skip this recommendation + # Not verified on Bandcamp — skip continue except Exception: continue