Add Bandcamp search and Listening Room page

Implement Bandcamp search service with autocomplete API and HTML scraping fallback. Add /api/bandcamp/search and /api/bandcamp/embed endpoints. Create Listening Room page with search, embedded player, and queue management. Add navigation entry and Bandcamp link on recommendation cards.
2026-03-30 23:38:14 -05:00
parent 3303cd1507
commit dd4df6a070
8 changed files with 673 additions and 3 deletions
--- a/backend/app/services/bandcamp.py
+++ b/backend/app/services/bandcamp.py
@@ -0,0 +1,218 @@
+import re
+
+import httpx
+
+
+AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete"
+SEARCH_URL = "https://bandcamp.com/search"
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
+}
+
+
+async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
+    """Search Bandcamp for tracks, albums, or artists.
+
+    item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists.
+    """
+    # Try autocomplete API first
+    results = await _search_autocomplete(query, item_type)
+    if results:
+        return results
+
+    # Fall back to HTML scraping
+    return await _search_html(query, item_type)
+
+
+async def _search_autocomplete(query: str, item_type: str) -> list[dict]:
+    """Try the undocumented Bandcamp autocomplete API."""
+    try:
+        async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client:
+            resp = await client.get(AUTOCOMPLETE_URL, params={"q": query})
+
+        if resp.status_code != 200:
+            return []
+
+        data = resp.json()
+        results = []
+
+        # The autocomplete API returns results grouped by type
+        auto_results = data.get("results", [])
+        for item in auto_results:
+            result_type = item.get("type", "")
+
+            # Map autocomplete types to our item_type filter
+            if item_type == "t" and result_type != "t":
+                continue
+            if item_type == "a" and result_type != "a":
+                continue
+            if item_type == "b" and result_type != "b":
+                continue
+
+            results.append({
+                "title": item.get("name", ""),
+                "artist": item.get("band_name", ""),
+                "art_url": item.get("img", item.get("art_id", None)),
+                "bandcamp_url": item.get("url", ""),
+                "item_type": result_type,
+            })
+
+        return results[:20]
+    except Exception:
+        return []
+
+
+async def _search_html(query: str, item_type: str) -> list[dict]:
+    """Fall back to scraping Bandcamp search results HTML."""
+    params = {"q": query, "item_type": item_type}
+    try:
+        async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
+            resp = await client.get(SEARCH_URL, params=params)
+
+        if resp.status_code != 200:
+            return []
+
+        html = resp.text
+        results = []
+
+        # Split by search result items
+        items = re.split(r'<li\s+class="searchresult\s', html)
+        for item_html in items[1:]:  # skip first split (before first result)
+            # Extract title and URL from heading link
+            heading_match = re.search(
+                r'class="heading">\s*<a\s+href="([^"]+)"[^>]*>\s*([^<]+)',
+                item_html,
+            )
+            if not heading_match:
+                continue
+
+            url = heading_match.group(1).strip()
+            title = heading_match.group(2).strip()
+
+            # Extract artist/subhead info
+            subhead_match = re.search(
+                r'class="subhead">\s*([^<]+)', item_html
+            )
+            artist = ""
+            if subhead_match:
+                subhead = subhead_match.group(1).strip()
+                # Subhead format varies: "by Artist" or "from Album by Artist"
+                by_match = re.search(r'by\s+(.+)', subhead)
+                if by_match:
+                    artist = by_match.group(1).strip()
+                else:
+                    artist = subhead
+
+            # Extract album art URL
+            art_match = re.search(
+                r'class="art">\s*<img\s+src="([^"]+)"', item_html
+            )
+            art_url = art_match.group(1).strip() if art_match else None
+
+            results.append({
+                "title": title,
+                "artist": artist,
+                "art_url": art_url,
+                "bandcamp_url": url,
+                "item_type": item_type,
+            })
+
+            if len(results) >= 20:
+                break
+
+        return results
+    except Exception:
+        return []
+
+
+async def get_embed_data(bandcamp_url: str) -> dict | None:
+    """Get embed info for a Bandcamp URL.
+
+    Fetches the page HTML, extracts the track/album ID, and returns
+    the embed iframe URL along with metadata.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
+            resp = await client.get(bandcamp_url)
+
+        if resp.status_code != 200:
+            return None
+
+        html = resp.text
+
+        # Determine if this is a track or album URL
+        is_track = "/track/" in bandcamp_url
+
+        # Try to extract the ID from meta tags or data attributes
+        # Look for: <meta property="og:video" content="...album=12345..." />
+        # or data-tralbum-id="12345"
+        item_id = None
+
+        tralbum_match = re.search(r'data-tralbum-id="(\d+)"', html)
+        if tralbum_match:
+            item_id = tralbum_match.group(1)
+
+        if not item_id:
+            # Try og:video meta tag which contains embed URL with ID
+            og_match = re.search(
+                r'<meta\s+property="og:video"\s+content="[^"]*(?:album|track)=(\d+)',
+                html,
+            )
+            if og_match:
+                item_id = og_match.group(1)
+
+        if not item_id:
+            # Try the embedded player link in the page
+            embed_match = re.search(
+                r'EmbeddedPlayer/(?:album|track)=(\d+)', html
+            )
+            if embed_match:
+                item_id = embed_match.group(1)
+
+        if not item_id:
+            return None
+
+        # Build embed URL
+        id_type = "track" if is_track else "album"
+        embed_url = (
+            f"https://bandcamp.com/EmbeddedPlayer/"
+            f"{id_type}={item_id}/size=large/"
+            f"bgcol=1C1917/linkcol=7C3AED/"
+            f"tracklist=false/transparent=true/"
+        )
+
+        # Extract title from og:title
+        title = ""
+        title_match = re.search(
+            r'<meta\s+property="og:title"\s+content="([^"]+)"', html
+        )
+        if title_match:
+            title = title_match.group(1).strip()
+
+        # Extract artist
+        artist = ""
+        artist_match = re.search(
+            r'<meta\s+property="og:site_name"\s+content="([^"]+)"', html
+        )
+        if artist_match:
+            artist = artist_match.group(1).strip()
+
+        # Extract art
+        art_url = None
+        art_match = re.search(
+            r'<meta\s+property="og:image"\s+content="([^"]+)"', html
+        )
+        if art_match:
+            art_url = art_match.group(1).strip()
+
+        return {
+            "embed_url": embed_url,
+            "title": title,
+            "artist": artist,
+            "art_url": art_url,
+            "item_id": item_id,
+            "item_type": id_type,
+        }
+    except Exception:
+        return None