Verify Bandcamp results match the actual artist/track before including

This commit is contained in:
root
2026-03-30 23:56:17 -05:00
parent c6a82cf9d9
commit 1efa5cd628
2 changed files with 47 additions and 9 deletions

View File

@@ -1,4 +1,5 @@
import re import re
from difflib import SequenceMatcher
import httpx import httpx
@@ -11,6 +12,46 @@ HEADERS = {
} }
def _normalize(s: str) -> str:
"""Normalize string for comparison."""
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
def _similarity(a: str, b: str) -> float:
"""Return similarity ratio between two strings."""
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
async def search_bandcamp_verified(artist: str, title: str) -> dict | None:
"""Search Bandcamp and only return a result if the artist actually matches.
Returns the best matching result or None if no good match found.
"""
# Try track search first: "artist title"
results = await search_bandcamp(f"{artist} {title}", item_type="t")
for r in results:
artist_sim = _similarity(r.get("artist", ""), artist)
title_sim = _similarity(r.get("title", ""), title)
# Require artist to be a strong match (>0.6) and title reasonable (>0.4)
if artist_sim >= 0.6 and title_sim >= 0.4:
return r
# Or if artist is very close, accept even if title differs (different track by same artist)
if artist_sim >= 0.8:
return r
# Try artist/band search as fallback
results = await search_bandcamp(artist, item_type="b")
for r in results:
artist_sim = _similarity(r.get("title", ""), artist) # For band results, title IS the band name
if artist_sim >= 0.6:
return r
artist_sim = _similarity(r.get("artist", ""), artist)
if artist_sim >= 0.6:
return r
return None
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]: async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
"""Search Bandcamp for tracks, albums, or artists. """Search Bandcamp for tracks, albums, or artists.

View File

@@ -158,7 +158,7 @@ Return ONLY the JSON array, no other text."""
except json.JSONDecodeError: except json.JSONDecodeError:
return [], remaining return [], remaining
from app.services.bandcamp import search_bandcamp from app.services.bandcamp import search_bandcamp_verified
# Save to DB — in bandcamp mode, only keep results verified on Bandcamp # Save to DB — in bandcamp mode, only keep results verified on Bandcamp
recommendations = [] recommendations = []
@@ -169,16 +169,13 @@ Return ONLY the JSON array, no other text."""
bandcamp_url = None bandcamp_url = None
if bandcamp_mode: if bandcamp_mode:
try: try:
results = await search_bandcamp( match = await search_bandcamp_verified(
f"{rec.get('artist', '')} {rec.get('title', '')}", item_type="t" rec.get("artist", ""), rec.get("title", "")
) )
if not results: if match:
# Try artist-only search as fallback bandcamp_url = match.get("bandcamp_url")
results = await search_bandcamp(rec.get("artist", ""), item_type="b")
if results:
bandcamp_url = results[0].get("bandcamp_url")
else: else:
# Not on Bandcamp — skip this recommendation # Not verified on Bandcamp — skip
continue continue
except Exception: except Exception:
continue continue