Add Bandcamp discovery via public API (no scraping) - browse new releases by genre tag
This commit is contained in:
@@ -1,58 +1,25 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from app.core.security import get_current_user
|
||||
from app.models.user import User
|
||||
from app.services.bandcamp import search_bandcamp, get_embed_data
|
||||
from app.services.bandcamp import discover_by_tag, get_trending_tags
|
||||
|
||||
router = APIRouter(prefix="/bandcamp", tags=["bandcamp"])
|
||||
|
||||
|
||||
class BandcampResult(BaseModel):
|
||||
title: str
|
||||
artist: str
|
||||
art_url: str | None = None
|
||||
bandcamp_url: str
|
||||
item_type: str
|
||||
|
||||
|
||||
class BandcampEmbedResponse(BaseModel):
|
||||
embed_url: str
|
||||
title: str
|
||||
artist: str
|
||||
art_url: str | None = None
|
||||
|
||||
|
||||
@router.get("/search", response_model=list[BandcampResult])
|
||||
async def bandcamp_search(
|
||||
q: str = Query(..., min_length=1),
|
||||
type: str = Query("t", pattern="^[tab]$"),
|
||||
@router.get("/discover")
|
||||
async def bandcamp_discover(
|
||||
tags: str = Query(..., description="Comma-separated tags, e.g. 'indie-rock,shoegaze'"),
|
||||
sort: str = Query("new", description="Sort: new, rec, or pop"),
|
||||
page: int = Query(1),
|
||||
user: User = Depends(get_current_user),
|
||||
):
|
||||
"""Search Bandcamp for tracks, albums, or artists."""
|
||||
results = await search_bandcamp(q.strip(), item_type=type)
|
||||
return [BandcampResult(**r) for r in results]
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
if not tag_list:
|
||||
return []
|
||||
return await discover_by_tag(tag_list, sort=sort, page=page)
|
||||
|
||||
|
||||
@router.get("/embed", response_model=BandcampEmbedResponse)
|
||||
async def bandcamp_embed(
|
||||
url: str = Query(..., min_length=1),
|
||||
user: User = Depends(get_current_user),
|
||||
):
|
||||
"""Get embed data for a Bandcamp URL."""
|
||||
if "bandcamp.com" not in url:
|
||||
raise HTTPException(status_code=400, detail="Not a valid Bandcamp URL")
|
||||
|
||||
data = await get_embed_data(url.strip())
|
||||
if not data:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Could not extract embed data from this Bandcamp page",
|
||||
)
|
||||
|
||||
return BandcampEmbedResponse(
|
||||
embed_url=data["embed_url"],
|
||||
title=data["title"],
|
||||
artist=data["artist"],
|
||||
art_url=data.get("art_url"),
|
||||
)
|
||||
@router.get("/tags")
|
||||
async def bandcamp_tags(user: User = Depends(get_current_user)):
|
||||
return await get_trending_tags()
|
||||
|
||||
@@ -2,7 +2,7 @@ from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.config import settings
|
||||
from app.api.endpoints import auth, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music
|
||||
from app.api.endpoints import auth, bandcamp, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music
|
||||
|
||||
app = FastAPI(title="Vynl API", version="1.0.0", redirect_slashes=False)
|
||||
|
||||
@@ -22,6 +22,7 @@ app.include_router(recommendations.router, prefix="/api")
|
||||
app.include_router(youtube_music.router, prefix="/api")
|
||||
app.include_router(manual_import.router, prefix="/api")
|
||||
app.include_router(lastfm.router, prefix="/api")
|
||||
app.include_router(bandcamp.router, prefix="/api")
|
||||
app.include_router(profile.router, prefix="/api")
|
||||
|
||||
|
||||
|
||||
@@ -1,255 +1,73 @@
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
"""Bandcamp discovery using their public APIs (no scraping)."""
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete"
|
||||
SEARCH_URL = "https://bandcamp.com/search"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
|
||||
}
|
||||
|
||||
|
||||
def _normalize(s: str) -> str:
|
||||
"""Normalize string for comparison."""
|
||||
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
|
||||
DIG_DEEPER_URL = "https://bandcamp.com/api/hub/2/dig_deeper"
|
||||
|
||||
|
||||
def _similarity(a: str, b: str) -> float:
|
||||
"""Return similarity ratio between two strings."""
|
||||
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
|
||||
async def discover_by_tag(
|
||||
tags: list[str],
|
||||
sort: str = "new",
|
||||
page: int = 1,
|
||||
) -> list[dict]:
|
||||
"""Discover new music on Bandcamp by tag using their public API.
|
||||
|
||||
Args:
|
||||
tags: List of genre/tag strings (e.g. ["indie-rock", "shoegaze"])
|
||||
sort: "new", "rec", or "pop" (new releases, recommended, popular)
|
||||
page: Page number for pagination
|
||||
|
||||
async def search_bandcamp_verified(artist: str, title: str) -> dict | None:
|
||||
"""Search Bandcamp and only return a result if the artist actually matches.
|
||||
|
||||
Returns the best matching result or None if no good match found.
|
||||
First tries artist+song, then falls back to artist-only search.
|
||||
Returns list of releases with: title, artist, art_url, bandcamp_url, genre, item_type
|
||||
"""
|
||||
# Try track search first: "artist title"
|
||||
results = await search_bandcamp(f"{artist} {title}", item_type="t")
|
||||
for r in results:
|
||||
artist_sim = _similarity(r.get("artist", ""), artist)
|
||||
title_sim = _similarity(r.get("title", ""), title)
|
||||
# Require artist to be a strong match (>0.75) AND title reasonable (>0.5)
|
||||
if artist_sim >= 0.75 and title_sim >= 0.5:
|
||||
return r
|
||||
async with httpx.AsyncClient(timeout=15, headers=HEADERS) as client:
|
||||
resp = await client.post(
|
||||
DIG_DEEPER_URL,
|
||||
json={
|
||||
"filters": {
|
||||
"format": "all",
|
||||
"location": 0,
|
||||
"sort": sort,
|
||||
"tags": tags,
|
||||
},
|
||||
"page": page,
|
||||
},
|
||||
)
|
||||
|
||||
# Try artist/band search as fallback — return their artist page URL
|
||||
results = await search_bandcamp(artist, item_type="b")
|
||||
for r in results:
|
||||
# For band results, title IS the band name
|
||||
name = r.get("title", "") or r.get("artist", "")
|
||||
if _similarity(name, artist) >= 0.7:
|
||||
return r
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
|
||||
"""Search Bandcamp for tracks, albums, or artists.
|
||||
|
||||
item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists.
|
||||
"""
|
||||
# Try autocomplete API first
|
||||
results = await _search_autocomplete(query, item_type)
|
||||
if results:
|
||||
return results
|
||||
|
||||
# Fall back to HTML scraping
|
||||
return await _search_html(query, item_type)
|
||||
|
||||
|
||||
async def _search_autocomplete(query: str, item_type: str) -> list[dict]:
|
||||
"""Try the undocumented Bandcamp autocomplete API."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client:
|
||||
resp = await client.get(AUTOCOMPLETE_URL, params={"q": query})
|
||||
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
results = []
|
||||
|
||||
# The autocomplete API returns results grouped by type
|
||||
auto_results = data.get("results", [])
|
||||
for item in auto_results:
|
||||
result_type = item.get("type", "")
|
||||
|
||||
# Map autocomplete types to our item_type filter
|
||||
if item_type == "t" and result_type != "t":
|
||||
continue
|
||||
if item_type == "a" and result_type != "a":
|
||||
continue
|
||||
if item_type == "b" and result_type != "b":
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"title": item.get("name", ""),
|
||||
"artist": item.get("band_name", ""),
|
||||
"art_url": item.get("img", item.get("art_id", None)),
|
||||
"bandcamp_url": item.get("url", ""),
|
||||
"item_type": result_type,
|
||||
})
|
||||
|
||||
return results[:20]
|
||||
except Exception:
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
|
||||
data = resp.json()
|
||||
results = []
|
||||
|
||||
async def _search_html(query: str, item_type: str) -> list[dict]:
|
||||
"""Fall back to scraping Bandcamp search results HTML."""
|
||||
params = {"q": query, "item_type": item_type}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
|
||||
resp = await client.get(SEARCH_URL, params=params)
|
||||
for item in data.get("items", []):
|
||||
art_id = item.get("art_id")
|
||||
art_url = f"https://f4.bcbits.com/img/a{art_id}_16.jpg" if art_id else None
|
||||
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
tralbum_type = item.get("tralbum_type", "a")
|
||||
type_path = "album" if tralbum_type == "a" else "track"
|
||||
item_url = item.get("tralbum_url", "")
|
||||
|
||||
html = resp.text
|
||||
results = []
|
||||
|
||||
# Split by search result items
|
||||
items = re.split(r'<li\s+class="searchresult\s', html)
|
||||
for item_html in items[1:]: # skip first split (before first result)
|
||||
# Extract title and URL from heading link
|
||||
heading_match = re.search(
|
||||
r'class="heading">\s*<a\s+href="([^"]+)"[^>]*>\s*([^<]+)',
|
||||
item_html,
|
||||
)
|
||||
if not heading_match:
|
||||
continue
|
||||
|
||||
url = heading_match.group(1).strip()
|
||||
title = heading_match.group(2).strip()
|
||||
|
||||
# Extract artist/subhead info
|
||||
subhead_match = re.search(
|
||||
r'class="subhead">\s*([^<]+)', item_html
|
||||
)
|
||||
artist = ""
|
||||
if subhead_match:
|
||||
subhead = subhead_match.group(1).strip()
|
||||
# Subhead format varies: "by Artist" or "from Album by Artist"
|
||||
by_match = re.search(r'by\s+(.+)', subhead)
|
||||
if by_match:
|
||||
artist = by_match.group(1).strip()
|
||||
else:
|
||||
artist = subhead
|
||||
|
||||
# Extract album art URL
|
||||
art_match = re.search(
|
||||
r'class="art">\s*<img\s+src="([^"]+)"', item_html
|
||||
)
|
||||
art_url = art_match.group(1).strip() if art_match else None
|
||||
|
||||
results.append({
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"art_url": art_url,
|
||||
"bandcamp_url": url,
|
||||
"item_type": item_type,
|
||||
})
|
||||
|
||||
if len(results) >= 20:
|
||||
break
|
||||
|
||||
return results
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def get_embed_data(bandcamp_url: str) -> dict | None:
|
||||
"""Get embed info for a Bandcamp URL.
|
||||
|
||||
Fetches the page HTML, extracts the track/album ID, and returns
|
||||
the embed iframe URL along with metadata.
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
|
||||
resp = await client.get(bandcamp_url)
|
||||
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
html = resp.text
|
||||
|
||||
# Determine if this is a track or album URL
|
||||
is_track = "/track/" in bandcamp_url
|
||||
|
||||
# Try to extract the ID from meta tags or data attributes
|
||||
# Look for: <meta property="og:video" content="...album=12345..." />
|
||||
# or data-tralbum-id="12345"
|
||||
item_id = None
|
||||
|
||||
tralbum_match = re.search(r'data-tralbum-id="(\d+)"', html)
|
||||
if tralbum_match:
|
||||
item_id = tralbum_match.group(1)
|
||||
|
||||
if not item_id:
|
||||
# Try og:video meta tag which contains embed URL with ID
|
||||
og_match = re.search(
|
||||
r'<meta\s+property="og:video"\s+content="[^"]*(?:album|track)=(\d+)',
|
||||
html,
|
||||
)
|
||||
if og_match:
|
||||
item_id = og_match.group(1)
|
||||
|
||||
if not item_id:
|
||||
# Try the embedded player link in the page
|
||||
embed_match = re.search(
|
||||
r'EmbeddedPlayer/(?:album|track)=(\d+)', html
|
||||
)
|
||||
if embed_match:
|
||||
item_id = embed_match.group(1)
|
||||
|
||||
if not item_id:
|
||||
return None
|
||||
|
||||
# Build embed URL
|
||||
id_type = "track" if is_track else "album"
|
||||
embed_url = (
|
||||
f"https://bandcamp.com/EmbeddedPlayer/"
|
||||
f"{id_type}={item_id}/size=large/"
|
||||
f"bgcol=1C1917/linkcol=7C3AED/"
|
||||
f"tracklist=false/transparent=true/"
|
||||
)
|
||||
|
||||
# Extract title from og:title
|
||||
title = ""
|
||||
title_match = re.search(
|
||||
r'<meta\s+property="og:title"\s+content="([^"]+)"', html
|
||||
)
|
||||
if title_match:
|
||||
title = title_match.group(1).strip()
|
||||
|
||||
# Extract artist
|
||||
artist = ""
|
||||
artist_match = re.search(
|
||||
r'<meta\s+property="og:site_name"\s+content="([^"]+)"', html
|
||||
)
|
||||
if artist_match:
|
||||
artist = artist_match.group(1).strip()
|
||||
|
||||
# Extract art
|
||||
art_url = None
|
||||
art_match = re.search(
|
||||
r'<meta\s+property="og:image"\s+content="([^"]+)"', html
|
||||
)
|
||||
if art_match:
|
||||
art_url = art_match.group(1).strip()
|
||||
|
||||
return {
|
||||
"embed_url": embed_url,
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
results.append({
|
||||
"title": item.get("title", ""),
|
||||
"artist": item.get("artist", ""),
|
||||
"art_url": art_url,
|
||||
"item_id": item_id,
|
||||
"item_type": id_type,
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
"bandcamp_url": item_url,
|
||||
"genre": ", ".join(tags),
|
||||
"item_type": type_path,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def get_trending_tags() -> list[str]:
|
||||
"""Return common Bandcamp genre tags for discovery."""
|
||||
return [
|
||||
"indie-rock", "electronic", "hip-hop-rap", "ambient", "punk",
|
||||
"experimental", "folk", "jazz", "metal", "pop", "r-b-soul",
|
||||
"shoegaze", "post-punk", "synthwave", "lo-fi", "dream-pop",
|
||||
"indie-pop", "psychedelic", "garage-rock", "emo",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user