Implement Bandcamp search service with autocomplete API and HTML scraping fallback. Add /api/bandcamp/search and /api/bandcamp/embed endpoints. Create Listening Room page with search, embedded player, and queue management. Add navigation entry and Bandcamp link on recommendation cards.
219 lines
6.7 KiB
Python
219 lines
6.7 KiB
Python
import re
|
|
|
|
import httpx
|
|
|
|
|
|
AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete"
|
|
SEARCH_URL = "https://bandcamp.com/search"
|
|
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
|
|
}
|
|
|
|
|
|
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
|
|
"""Search Bandcamp for tracks, albums, or artists.
|
|
|
|
item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists.
|
|
"""
|
|
# Try autocomplete API first
|
|
results = await _search_autocomplete(query, item_type)
|
|
if results:
|
|
return results
|
|
|
|
# Fall back to HTML scraping
|
|
return await _search_html(query, item_type)
|
|
|
|
|
|
async def _search_autocomplete(query: str, item_type: str) -> list[dict]:
|
|
"""Try the undocumented Bandcamp autocomplete API."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client:
|
|
resp = await client.get(AUTOCOMPLETE_URL, params={"q": query})
|
|
|
|
if resp.status_code != 200:
|
|
return []
|
|
|
|
data = resp.json()
|
|
results = []
|
|
|
|
# The autocomplete API returns results grouped by type
|
|
auto_results = data.get("results", [])
|
|
for item in auto_results:
|
|
result_type = item.get("type", "")
|
|
|
|
# Map autocomplete types to our item_type filter
|
|
if item_type == "t" and result_type != "t":
|
|
continue
|
|
if item_type == "a" and result_type != "a":
|
|
continue
|
|
if item_type == "b" and result_type != "b":
|
|
continue
|
|
|
|
results.append({
|
|
"title": item.get("name", ""),
|
|
"artist": item.get("band_name", ""),
|
|
"art_url": item.get("img", item.get("art_id", None)),
|
|
"bandcamp_url": item.get("url", ""),
|
|
"item_type": result_type,
|
|
})
|
|
|
|
return results[:20]
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
async def _search_html(query: str, item_type: str) -> list[dict]:
|
|
"""Fall back to scraping Bandcamp search results HTML."""
|
|
params = {"q": query, "item_type": item_type}
|
|
try:
|
|
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
|
|
resp = await client.get(SEARCH_URL, params=params)
|
|
|
|
if resp.status_code != 200:
|
|
return []
|
|
|
|
html = resp.text
|
|
results = []
|
|
|
|
# Split by search result items
|
|
items = re.split(r'<li\s+class="searchresult\s', html)
|
|
for item_html in items[1:]: # skip first split (before first result)
|
|
# Extract title and URL from heading link
|
|
heading_match = re.search(
|
|
r'class="heading">\s*<a\s+href="([^"]+)"[^>]*>\s*([^<]+)',
|
|
item_html,
|
|
)
|
|
if not heading_match:
|
|
continue
|
|
|
|
url = heading_match.group(1).strip()
|
|
title = heading_match.group(2).strip()
|
|
|
|
# Extract artist/subhead info
|
|
subhead_match = re.search(
|
|
r'class="subhead">\s*([^<]+)', item_html
|
|
)
|
|
artist = ""
|
|
if subhead_match:
|
|
subhead = subhead_match.group(1).strip()
|
|
# Subhead format varies: "by Artist" or "from Album by Artist"
|
|
by_match = re.search(r'by\s+(.+)', subhead)
|
|
if by_match:
|
|
artist = by_match.group(1).strip()
|
|
else:
|
|
artist = subhead
|
|
|
|
# Extract album art URL
|
|
art_match = re.search(
|
|
r'class="art">\s*<img\s+src="([^"]+)"', item_html
|
|
)
|
|
art_url = art_match.group(1).strip() if art_match else None
|
|
|
|
results.append({
|
|
"title": title,
|
|
"artist": artist,
|
|
"art_url": art_url,
|
|
"bandcamp_url": url,
|
|
"item_type": item_type,
|
|
})
|
|
|
|
if len(results) >= 20:
|
|
break
|
|
|
|
return results
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
async def get_embed_data(bandcamp_url: str) -> dict | None:
|
|
"""Get embed info for a Bandcamp URL.
|
|
|
|
Fetches the page HTML, extracts the track/album ID, and returns
|
|
the embed iframe URL along with metadata.
|
|
"""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
|
|
resp = await client.get(bandcamp_url)
|
|
|
|
if resp.status_code != 200:
|
|
return None
|
|
|
|
html = resp.text
|
|
|
|
# Determine if this is a track or album URL
|
|
is_track = "/track/" in bandcamp_url
|
|
|
|
# Try to extract the ID from meta tags or data attributes
|
|
# Look for: <meta property="og:video" content="...album=12345..." />
|
|
# or data-tralbum-id="12345"
|
|
item_id = None
|
|
|
|
tralbum_match = re.search(r'data-tralbum-id="(\d+)"', html)
|
|
if tralbum_match:
|
|
item_id = tralbum_match.group(1)
|
|
|
|
if not item_id:
|
|
# Try og:video meta tag which contains embed URL with ID
|
|
og_match = re.search(
|
|
r'<meta\s+property="og:video"\s+content="[^"]*(?:album|track)=(\d+)',
|
|
html,
|
|
)
|
|
if og_match:
|
|
item_id = og_match.group(1)
|
|
|
|
if not item_id:
|
|
# Try the embedded player link in the page
|
|
embed_match = re.search(
|
|
r'EmbeddedPlayer/(?:album|track)=(\d+)', html
|
|
)
|
|
if embed_match:
|
|
item_id = embed_match.group(1)
|
|
|
|
if not item_id:
|
|
return None
|
|
|
|
# Build embed URL
|
|
id_type = "track" if is_track else "album"
|
|
embed_url = (
|
|
f"https://bandcamp.com/EmbeddedPlayer/"
|
|
f"{id_type}={item_id}/size=large/"
|
|
f"bgcol=1C1917/linkcol=7C3AED/"
|
|
f"tracklist=false/transparent=true/"
|
|
)
|
|
|
|
# Extract title from og:title
|
|
title = ""
|
|
title_match = re.search(
|
|
r'<meta\s+property="og:title"\s+content="([^"]+)"', html
|
|
)
|
|
if title_match:
|
|
title = title_match.group(1).strip()
|
|
|
|
# Extract artist
|
|
artist = ""
|
|
artist_match = re.search(
|
|
r'<meta\s+property="og:site_name"\s+content="([^"]+)"', html
|
|
)
|
|
if artist_match:
|
|
artist = artist_match.group(1).strip()
|
|
|
|
# Extract art
|
|
art_url = None
|
|
art_match = re.search(
|
|
r'<meta\s+property="og:image"\s+content="([^"]+)"', html
|
|
)
|
|
if art_match:
|
|
art_url = art_match.group(1).strip()
|
|
|
|
return {
|
|
"embed_url": embed_url,
|
|
"title": title,
|
|
"artist": artist,
|
|
"art_url": art_url,
|
|
"item_id": item_id,
|
|
"item_type": id_type,
|
|
}
|
|
except Exception:
|
|
return None
|