Verify Bandcamp results match the actual artist/track before including
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
@@ -11,6 +12,46 @@ HEADERS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize(s: str) -> str:
|
||||||
|
"""Normalize string for comparison."""
|
||||||
|
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _similarity(a: str, b: str) -> float:
|
||||||
|
"""Return similarity ratio between two strings."""
|
||||||
|
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
|
||||||
|
|
||||||
|
|
||||||
|
async def search_bandcamp_verified(artist: str, title: str) -> dict | None:
|
||||||
|
"""Search Bandcamp and only return a result if the artist actually matches.
|
||||||
|
|
||||||
|
Returns the best matching result or None if no good match found.
|
||||||
|
"""
|
||||||
|
# Try track search first: "artist title"
|
||||||
|
results = await search_bandcamp(f"{artist} {title}", item_type="t")
|
||||||
|
for r in results:
|
||||||
|
artist_sim = _similarity(r.get("artist", ""), artist)
|
||||||
|
title_sim = _similarity(r.get("title", ""), title)
|
||||||
|
# Require artist to be a strong match (>0.6) and title reasonable (>0.4)
|
||||||
|
if artist_sim >= 0.6 and title_sim >= 0.4:
|
||||||
|
return r
|
||||||
|
# Or if artist is very close, accept even if title differs (different track by same artist)
|
||||||
|
if artist_sim >= 0.8:
|
||||||
|
return r
|
||||||
|
|
||||||
|
# Try artist/band search as fallback
|
||||||
|
results = await search_bandcamp(artist, item_type="b")
|
||||||
|
for r in results:
|
||||||
|
artist_sim = _similarity(r.get("title", ""), artist) # For band results, title IS the band name
|
||||||
|
if artist_sim >= 0.6:
|
||||||
|
return r
|
||||||
|
artist_sim = _similarity(r.get("artist", ""), artist)
|
||||||
|
if artist_sim >= 0.6:
|
||||||
|
return r
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
|
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
|
||||||
"""Search Bandcamp for tracks, albums, or artists.
|
"""Search Bandcamp for tracks, albums, or artists.
|
||||||
|
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ Return ONLY the JSON array, no other text."""
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return [], remaining
|
return [], remaining
|
||||||
|
|
||||||
from app.services.bandcamp import search_bandcamp
|
from app.services.bandcamp import search_bandcamp_verified
|
||||||
|
|
||||||
# Save to DB — in bandcamp mode, only keep results verified on Bandcamp
|
# Save to DB — in bandcamp mode, only keep results verified on Bandcamp
|
||||||
recommendations = []
|
recommendations = []
|
||||||
@@ -169,16 +169,13 @@ Return ONLY the JSON array, no other text."""
|
|||||||
bandcamp_url = None
|
bandcamp_url = None
|
||||||
if bandcamp_mode:
|
if bandcamp_mode:
|
||||||
try:
|
try:
|
||||||
results = await search_bandcamp(
|
match = await search_bandcamp_verified(
|
||||||
f"{rec.get('artist', '')} {rec.get('title', '')}", item_type="t"
|
rec.get("artist", ""), rec.get("title", "")
|
||||||
)
|
)
|
||||||
if not results:
|
if match:
|
||||||
# Try artist-only search as fallback
|
bandcamp_url = match.get("bandcamp_url")
|
||||||
results = await search_bandcamp(rec.get("artist", ""), item_type="b")
|
|
||||||
if results:
|
|
||||||
bandcamp_url = results[0].get("bandcamp_url")
|
|
||||||
else:
|
else:
|
||||||
# Not on Bandcamp — skip this recommendation
|
# Not verified on Bandcamp — skip
|
||||||
continue
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user