import re from difflib import SequenceMatcher import httpx AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete" SEARCH_URL = "https://bandcamp.com/search" HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", } def _normalize(s: str) -> str: """Normalize string for comparison.""" return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() def _similarity(a: str, b: str) -> float: """Return similarity ratio between two strings.""" return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() async def search_bandcamp_verified(artist: str, title: str) -> dict | None: """Search Bandcamp and only return a result if the artist actually matches. Returns the best matching result or None if no good match found. """ # Try track search first: "artist title" results = await search_bandcamp(f"{artist} {title}", item_type="t") for r in results: artist_sim = _similarity(r.get("artist", ""), artist) title_sim = _similarity(r.get("title", ""), title) # Require artist to be a strong match (>0.75) AND title reasonable (>0.5) if artist_sim >= 0.75 and title_sim >= 0.5: return r # Try artist/band search as fallback — very strict matching results = await search_bandcamp(artist, item_type="b") for r in results: # For band results, title IS the band name name = r.get("title", "") or r.get("artist", "") if _similarity(name, artist) >= 0.8: return r return None async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]: """Search Bandcamp for tracks, albums, or artists. item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists. """ # Try autocomplete API first results = await _search_autocomplete(query, item_type) if results: return results # Fall back to HTML scraping return await _search_html(query, item_type) async def _search_autocomplete(query: str, item_type: str) -> list[dict]: """Try the undocumented Bandcamp autocomplete API.""" try: async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client: resp = await client.get(AUTOCOMPLETE_URL, params={"q": query}) if resp.status_code != 200: return [] data = resp.json() results = [] # The autocomplete API returns results grouped by type auto_results = data.get("results", []) for item in auto_results: result_type = item.get("type", "") # Map autocomplete types to our item_type filter if item_type == "t" and result_type != "t": continue if item_type == "a" and result_type != "a": continue if item_type == "b" and result_type != "b": continue results.append({ "title": item.get("name", ""), "artist": item.get("band_name", ""), "art_url": item.get("img", item.get("art_id", None)), "bandcamp_url": item.get("url", ""), "item_type": result_type, }) return results[:20] except Exception: return [] async def _search_html(query: str, item_type: str) -> list[dict]: """Fall back to scraping Bandcamp search results HTML.""" params = {"q": query, "item_type": item_type} try: async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client: resp = await client.get(SEARCH_URL, params=params) if resp.status_code != 200: return [] html = resp.text results = [] # Split by search result items items = re.split(r'