diff --git a/backend/app/api/endpoints/bandcamp.py b/backend/app/api/endpoints/bandcamp.py index 1635031..d369aab 100644 --- a/backend/app/api/endpoints/bandcamp.py +++ b/backend/app/api/endpoints/bandcamp.py @@ -1,58 +1,25 @@ -from fastapi import APIRouter, Depends, HTTPException, Query -from pydantic import BaseModel +from fastapi import APIRouter, Depends, Query from app.core.security import get_current_user from app.models.user import User -from app.services.bandcamp import search_bandcamp, get_embed_data +from app.services.bandcamp import discover_by_tag, get_trending_tags router = APIRouter(prefix="/bandcamp", tags=["bandcamp"]) -class BandcampResult(BaseModel): - title: str - artist: str - art_url: str | None = None - bandcamp_url: str - item_type: str - - -class BandcampEmbedResponse(BaseModel): - embed_url: str - title: str - artist: str - art_url: str | None = None - - -@router.get("/search", response_model=list[BandcampResult]) -async def bandcamp_search( - q: str = Query(..., min_length=1), - type: str = Query("t", pattern="^[tab]$"), +@router.get("/discover") +async def bandcamp_discover( + tags: str = Query(..., description="Comma-separated tags, e.g. 'indie-rock,shoegaze'"), + sort: str = Query("new", description="Sort: new, rec, or pop"), + page: int = Query(1), user: User = Depends(get_current_user), ): - """Search Bandcamp for tracks, albums, or artists.""" - results = await search_bandcamp(q.strip(), item_type=type) - return [BandcampResult(**r) for r in results] + tag_list = [t.strip() for t in tags.split(",") if t.strip()] + if not tag_list: + return [] + return await discover_by_tag(tag_list, sort=sort, page=page) -@router.get("/embed", response_model=BandcampEmbedResponse) -async def bandcamp_embed( - url: str = Query(..., min_length=1), - user: User = Depends(get_current_user), -): - """Get embed data for a Bandcamp URL.""" - if "bandcamp.com" not in url: - raise HTTPException(status_code=400, detail="Not a valid Bandcamp URL") - - data = await get_embed_data(url.strip()) - if not data: - raise HTTPException( - status_code=404, - detail="Could not extract embed data from this Bandcamp page", - ) - - return BandcampEmbedResponse( - embed_url=data["embed_url"], - title=data["title"], - artist=data["artist"], - art_url=data.get("art_url"), - ) +@router.get("/tags") +async def bandcamp_tags(user: User = Depends(get_current_user)): + return await get_trending_tags() diff --git a/backend/app/main.py b/backend/app/main.py index 6f4611e..5f1910e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -2,7 +2,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.core.config import settings -from app.api.endpoints import auth, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music +from app.api.endpoints import auth, bandcamp, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music app = FastAPI(title="Vynl API", version="1.0.0", redirect_slashes=False) @@ -22,6 +22,7 @@ app.include_router(recommendations.router, prefix="/api") app.include_router(youtube_music.router, prefix="/api") app.include_router(manual_import.router, prefix="/api") app.include_router(lastfm.router, prefix="/api") +app.include_router(bandcamp.router, prefix="/api") app.include_router(profile.router, prefix="/api") diff --git a/backend/app/services/bandcamp.py b/backend/app/services/bandcamp.py index efc4183..1ecefb9 100644 --- a/backend/app/services/bandcamp.py +++ b/backend/app/services/bandcamp.py @@ -1,255 +1,73 @@ -import re -from difflib import SequenceMatcher +"""Bandcamp discovery using their public APIs (no scraping).""" import httpx - -AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete" -SEARCH_URL = "https://bandcamp.com/search" - HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", } - -def _normalize(s: str) -> str: - """Normalize string for comparison.""" - return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() +DIG_DEEPER_URL = "https://bandcamp.com/api/hub/2/dig_deeper" -def _similarity(a: str, b: str) -> float: - """Return similarity ratio between two strings.""" - return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() +async def discover_by_tag( + tags: list[str], + sort: str = "new", + page: int = 1, +) -> list[dict]: + """Discover new music on Bandcamp by tag using their public API. + Args: + tags: List of genre/tag strings (e.g. ["indie-rock", "shoegaze"]) + sort: "new", "rec", or "pop" (new releases, recommended, popular) + page: Page number for pagination -async def search_bandcamp_verified(artist: str, title: str) -> dict | None: - """Search Bandcamp and only return a result if the artist actually matches. - - Returns the best matching result or None if no good match found. - First tries artist+song, then falls back to artist-only search. + Returns list of releases with: title, artist, art_url, bandcamp_url, genre, item_type """ - # Try track search first: "artist title" - results = await search_bandcamp(f"{artist} {title}", item_type="t") - for r in results: - artist_sim = _similarity(r.get("artist", ""), artist) - title_sim = _similarity(r.get("title", ""), title) - # Require artist to be a strong match (>0.75) AND title reasonable (>0.5) - if artist_sim >= 0.75 and title_sim >= 0.5: - return r + async with httpx.AsyncClient(timeout=15, headers=HEADERS) as client: + resp = await client.post( + DIG_DEEPER_URL, + json={ + "filters": { + "format": "all", + "location": 0, + "sort": sort, + "tags": tags, + }, + "page": page, + }, + ) - # Try artist/band search as fallback — return their artist page URL - results = await search_bandcamp(artist, item_type="b") - for r in results: - # For band results, title IS the band name - name = r.get("title", "") or r.get("artist", "") - if _similarity(name, artist) >= 0.7: - return r - - return None - - -async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]: - """Search Bandcamp for tracks, albums, or artists. - - item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists. - """ - # Try autocomplete API first - results = await _search_autocomplete(query, item_type) - if results: - return results - - # Fall back to HTML scraping - return await _search_html(query, item_type) - - -async def _search_autocomplete(query: str, item_type: str) -> list[dict]: - """Try the undocumented Bandcamp autocomplete API.""" - try: - async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client: - resp = await client.get(AUTOCOMPLETE_URL, params={"q": query}) - - if resp.status_code != 200: - return [] - - data = resp.json() - results = [] - - # The autocomplete API returns results grouped by type - auto_results = data.get("results", []) - for item in auto_results: - result_type = item.get("type", "") - - # Map autocomplete types to our item_type filter - if item_type == "t" and result_type != "t": - continue - if item_type == "a" and result_type != "a": - continue - if item_type == "b" and result_type != "b": - continue - - results.append({ - "title": item.get("name", ""), - "artist": item.get("band_name", ""), - "art_url": item.get("img", item.get("art_id", None)), - "bandcamp_url": item.get("url", ""), - "item_type": result_type, - }) - - return results[:20] - except Exception: + if resp.status_code != 200: return [] + data = resp.json() + results = [] -async def _search_html(query: str, item_type: str) -> list[dict]: - """Fall back to scraping Bandcamp search results HTML.""" - params = {"q": query, "item_type": item_type} - try: - async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client: - resp = await client.get(SEARCH_URL, params=params) + for item in data.get("items", []): + art_id = item.get("art_id") + art_url = f"https://f4.bcbits.com/img/a{art_id}_16.jpg" if art_id else None - if resp.status_code != 200: - return [] + tralbum_type = item.get("tralbum_type", "a") + type_path = "album" if tralbum_type == "a" else "track" + item_url = item.get("tralbum_url", "") - html = resp.text - results = [] - - # Split by search result items - items = re.split(r'
Browse new independent releases
+Select some genres to start digging
+No releases found for these tags
++ {release.artist} +
+ {release.genre && ( + + {release.genre} + + )} + +