Verify Bandcamp results match the actual artist/track before including
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -11,6 +12,46 @@ HEADERS = {
|
||||
}
|
||||
|
||||
|
||||
def _normalize(s: str) -> str:
|
||||
"""Normalize string for comparison."""
|
||||
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
|
||||
|
||||
|
||||
def _similarity(a: str, b: str) -> float:
|
||||
"""Return similarity ratio between two strings."""
|
||||
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
|
||||
|
||||
|
||||
async def search_bandcamp_verified(artist: str, title: str) -> dict | None:
|
||||
"""Search Bandcamp and only return a result if the artist actually matches.
|
||||
|
||||
Returns the best matching result or None if no good match found.
|
||||
"""
|
||||
# Try track search first: "artist title"
|
||||
results = await search_bandcamp(f"{artist} {title}", item_type="t")
|
||||
for r in results:
|
||||
artist_sim = _similarity(r.get("artist", ""), artist)
|
||||
title_sim = _similarity(r.get("title", ""), title)
|
||||
# Require artist to be a strong match (>0.6) and title reasonable (>0.4)
|
||||
if artist_sim >= 0.6 and title_sim >= 0.4:
|
||||
return r
|
||||
# Or if artist is very close, accept even if title differs (different track by same artist)
|
||||
if artist_sim >= 0.8:
|
||||
return r
|
||||
|
||||
# Try artist/band search as fallback
|
||||
results = await search_bandcamp(artist, item_type="b")
|
||||
for r in results:
|
||||
artist_sim = _similarity(r.get("title", ""), artist) # For band results, title IS the band name
|
||||
if artist_sim >= 0.6:
|
||||
return r
|
||||
artist_sim = _similarity(r.get("artist", ""), artist)
|
||||
if artist_sim >= 0.6:
|
||||
return r
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
|
||||
"""Search Bandcamp for tracks, albums, or artists.
|
||||
|
||||
|
||||
@@ -158,7 +158,7 @@ Return ONLY the JSON array, no other text."""
|
||||
except json.JSONDecodeError:
|
||||
return [], remaining
|
||||
|
||||
from app.services.bandcamp import search_bandcamp
|
||||
from app.services.bandcamp import search_bandcamp_verified
|
||||
|
||||
# Save to DB — in bandcamp mode, only keep results verified on Bandcamp
|
||||
recommendations = []
|
||||
@@ -169,16 +169,13 @@ Return ONLY the JSON array, no other text."""
|
||||
bandcamp_url = None
|
||||
if bandcamp_mode:
|
||||
try:
|
||||
results = await search_bandcamp(
|
||||
f"{rec.get('artist', '')} {rec.get('title', '')}", item_type="t"
|
||||
match = await search_bandcamp_verified(
|
||||
rec.get("artist", ""), rec.get("title", "")
|
||||
)
|
||||
if not results:
|
||||
# Try artist-only search as fallback
|
||||
results = await search_bandcamp(rec.get("artist", ""), item_type="b")
|
||||
if results:
|
||||
bandcamp_url = results[0].get("bandcamp_url")
|
||||
if match:
|
||||
bandcamp_url = match.get("bandcamp_url")
|
||||
else:
|
||||
# Not on Bandcamp — skip this recommendation
|
||||
# Not verified on Bandcamp — skip
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user