From 152f21767590e6a7fe08d05c4d8f9c45a88081f0 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 31 Mar 2026 09:58:28 -0500 Subject: [PATCH] Add Bandcamp discovery via public API (no scraping) - browse new releases by genre tag --- backend/app/api/endpoints/bandcamp.py | 61 ++--- backend/app/main.py | 3 +- backend/app/services/bandcamp.py | 290 +++++------------------- frontend/src/App.tsx | 11 + frontend/src/components/Layout.tsx | 3 +- frontend/src/lib/api.ts | 22 +- frontend/src/pages/BandcampDiscover.tsx | 206 +++++++++++++++++ 7 files changed, 295 insertions(+), 301 deletions(-) create mode 100644 frontend/src/pages/BandcampDiscover.tsx diff --git a/backend/app/api/endpoints/bandcamp.py b/backend/app/api/endpoints/bandcamp.py index 1635031..d369aab 100644 --- a/backend/app/api/endpoints/bandcamp.py +++ b/backend/app/api/endpoints/bandcamp.py @@ -1,58 +1,25 @@ -from fastapi import APIRouter, Depends, HTTPException, Query -from pydantic import BaseModel +from fastapi import APIRouter, Depends, Query from app.core.security import get_current_user from app.models.user import User -from app.services.bandcamp import search_bandcamp, get_embed_data +from app.services.bandcamp import discover_by_tag, get_trending_tags router = APIRouter(prefix="/bandcamp", tags=["bandcamp"]) -class BandcampResult(BaseModel): - title: str - artist: str - art_url: str | None = None - bandcamp_url: str - item_type: str - - -class BandcampEmbedResponse(BaseModel): - embed_url: str - title: str - artist: str - art_url: str | None = None - - -@router.get("/search", response_model=list[BandcampResult]) -async def bandcamp_search( - q: str = Query(..., min_length=1), - type: str = Query("t", pattern="^[tab]$"), +@router.get("/discover") +async def bandcamp_discover( + tags: str = Query(..., description="Comma-separated tags, e.g. 'indie-rock,shoegaze'"), + sort: str = Query("new", description="Sort: new, rec, or pop"), + page: int = Query(1), user: User = Depends(get_current_user), ): - """Search Bandcamp for tracks, albums, or artists.""" - results = await search_bandcamp(q.strip(), item_type=type) - return [BandcampResult(**r) for r in results] + tag_list = [t.strip() for t in tags.split(",") if t.strip()] + if not tag_list: + return [] + return await discover_by_tag(tag_list, sort=sort, page=page) -@router.get("/embed", response_model=BandcampEmbedResponse) -async def bandcamp_embed( - url: str = Query(..., min_length=1), - user: User = Depends(get_current_user), -): - """Get embed data for a Bandcamp URL.""" - if "bandcamp.com" not in url: - raise HTTPException(status_code=400, detail="Not a valid Bandcamp URL") - - data = await get_embed_data(url.strip()) - if not data: - raise HTTPException( - status_code=404, - detail="Could not extract embed data from this Bandcamp page", - ) - - return BandcampEmbedResponse( - embed_url=data["embed_url"], - title=data["title"], - artist=data["artist"], - art_url=data.get("art_url"), - ) +@router.get("/tags") +async def bandcamp_tags(user: User = Depends(get_current_user)): + return await get_trending_tags() diff --git a/backend/app/main.py b/backend/app/main.py index 6f4611e..5f1910e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -2,7 +2,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.core.config import settings -from app.api.endpoints import auth, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music +from app.api.endpoints import auth, bandcamp, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music app = FastAPI(title="Vynl API", version="1.0.0", redirect_slashes=False) @@ -22,6 +22,7 @@ app.include_router(recommendations.router, prefix="/api") app.include_router(youtube_music.router, prefix="/api") app.include_router(manual_import.router, prefix="/api") app.include_router(lastfm.router, prefix="/api") +app.include_router(bandcamp.router, prefix="/api") app.include_router(profile.router, prefix="/api") diff --git a/backend/app/services/bandcamp.py b/backend/app/services/bandcamp.py index efc4183..1ecefb9 100644 --- a/backend/app/services/bandcamp.py +++ b/backend/app/services/bandcamp.py @@ -1,255 +1,73 @@ -import re -from difflib import SequenceMatcher +"""Bandcamp discovery using their public APIs (no scraping).""" import httpx - -AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete" -SEARCH_URL = "https://bandcamp.com/search" - HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", } - -def _normalize(s: str) -> str: - """Normalize string for comparison.""" - return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() +DIG_DEEPER_URL = "https://bandcamp.com/api/hub/2/dig_deeper" -def _similarity(a: str, b: str) -> float: - """Return similarity ratio between two strings.""" - return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() +async def discover_by_tag( + tags: list[str], + sort: str = "new", + page: int = 1, +) -> list[dict]: + """Discover new music on Bandcamp by tag using their public API. + Args: + tags: List of genre/tag strings (e.g. ["indie-rock", "shoegaze"]) + sort: "new", "rec", or "pop" (new releases, recommended, popular) + page: Page number for pagination -async def search_bandcamp_verified(artist: str, title: str) -> dict | None: - """Search Bandcamp and only return a result if the artist actually matches. - - Returns the best matching result or None if no good match found. - First tries artist+song, then falls back to artist-only search. + Returns list of releases with: title, artist, art_url, bandcamp_url, genre, item_type """ - # Try track search first: "artist title" - results = await search_bandcamp(f"{artist} {title}", item_type="t") - for r in results: - artist_sim = _similarity(r.get("artist", ""), artist) - title_sim = _similarity(r.get("title", ""), title) - # Require artist to be a strong match (>0.75) AND title reasonable (>0.5) - if artist_sim >= 0.75 and title_sim >= 0.5: - return r + async with httpx.AsyncClient(timeout=15, headers=HEADERS) as client: + resp = await client.post( + DIG_DEEPER_URL, + json={ + "filters": { + "format": "all", + "location": 0, + "sort": sort, + "tags": tags, + }, + "page": page, + }, + ) - # Try artist/band search as fallback — return their artist page URL - results = await search_bandcamp(artist, item_type="b") - for r in results: - # For band results, title IS the band name - name = r.get("title", "") or r.get("artist", "") - if _similarity(name, artist) >= 0.7: - return r - - return None - - -async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]: - """Search Bandcamp for tracks, albums, or artists. - - item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists. - """ - # Try autocomplete API first - results = await _search_autocomplete(query, item_type) - if results: - return results - - # Fall back to HTML scraping - return await _search_html(query, item_type) - - -async def _search_autocomplete(query: str, item_type: str) -> list[dict]: - """Try the undocumented Bandcamp autocomplete API.""" - try: - async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client: - resp = await client.get(AUTOCOMPLETE_URL, params={"q": query}) - - if resp.status_code != 200: - return [] - - data = resp.json() - results = [] - - # The autocomplete API returns results grouped by type - auto_results = data.get("results", []) - for item in auto_results: - result_type = item.get("type", "") - - # Map autocomplete types to our item_type filter - if item_type == "t" and result_type != "t": - continue - if item_type == "a" and result_type != "a": - continue - if item_type == "b" and result_type != "b": - continue - - results.append({ - "title": item.get("name", ""), - "artist": item.get("band_name", ""), - "art_url": item.get("img", item.get("art_id", None)), - "bandcamp_url": item.get("url", ""), - "item_type": result_type, - }) - - return results[:20] - except Exception: + if resp.status_code != 200: return [] + data = resp.json() + results = [] -async def _search_html(query: str, item_type: str) -> list[dict]: - """Fall back to scraping Bandcamp search results HTML.""" - params = {"q": query, "item_type": item_type} - try: - async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client: - resp = await client.get(SEARCH_URL, params=params) + for item in data.get("items", []): + art_id = item.get("art_id") + art_url = f"https://f4.bcbits.com/img/a{art_id}_16.jpg" if art_id else None - if resp.status_code != 200: - return [] + tralbum_type = item.get("tralbum_type", "a") + type_path = "album" if tralbum_type == "a" else "track" + item_url = item.get("tralbum_url", "") - html = resp.text - results = [] - - # Split by search result items - items = re.split(r'\s*]*>\s*([^<]+)', - item_html, - ) - if not heading_match: - continue - - url = heading_match.group(1).strip() - title = heading_match.group(2).strip() - - # Extract artist/subhead info - subhead_match = re.search( - r'class="subhead">\s*([^<]+)', item_html - ) - artist = "" - if subhead_match: - subhead = subhead_match.group(1).strip() - # Subhead format varies: "by Artist" or "from Album by Artist" - by_match = re.search(r'by\s+(.+)', subhead) - if by_match: - artist = by_match.group(1).strip() - else: - artist = subhead - - # Extract album art URL - art_match = re.search( - r'class="art">\s*= 20: - break - - return results - except Exception: - return [] - - -async def get_embed_data(bandcamp_url: str) -> dict | None: - """Get embed info for a Bandcamp URL. - - Fetches the page HTML, extracts the track/album ID, and returns - the embed iframe URL along with metadata. - """ - try: - async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client: - resp = await client.get(bandcamp_url) - - if resp.status_code != 200: - return None - - html = resp.text - - # Determine if this is a track or album URL - is_track = "/track/" in bandcamp_url - - # Try to extract the ID from meta tags or data attributes - # Look for: - # or data-tralbum-id="12345" - item_id = None - - tralbum_match = re.search(r'data-tralbum-id="(\d+)"', html) - if tralbum_match: - item_id = tralbum_match.group(1) - - if not item_id: - # Try og:video meta tag which contains embed URL with ID - og_match = re.search( - r' list[str]: + """Return common Bandcamp genre tags for discovery.""" + return [ + "indie-rock", "electronic", "hip-hop-rap", "ambient", "punk", + "experimental", "folk", "jazz", "metal", "pop", "r-b-soul", + "shoegaze", "post-punk", "synthwave", "lo-fi", "dream-pop", + "indie-pop", "psychedelic", "garage-rock", "emo", + ] diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 9b20440..e17070c 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -11,6 +11,7 @@ import Discover from './pages/Discover' import Recommendations from './pages/Recommendations' import Billing from './pages/Billing' import TasteProfilePage from './pages/TasteProfilePage' +import BandcampDiscover from './pages/BandcampDiscover' function RootRedirect() { const { user, loading } = useAuth() @@ -82,6 +83,16 @@ function AppRoutes() { } /> + + + + + + } + /> api.get('/billing/status').then((r) => r.data) // Bandcamp -export interface BandcampResult { +export interface BandcampRelease { title: string artist: string art_url: string | null bandcamp_url: string + genre: string item_type: string } -export interface BandcampEmbed { - embed_url: string - title: string - artist: string - art_url: string | null -} +export const discoverBandcamp = (tags: string, sort: string = 'new', page: number = 1) => + api.get('/bandcamp/discover', { params: { tags, sort, page } }).then((r) => r.data) -export async function searchBandcamp(query: string, type: string = 't'): Promise { - const { data } = await api.get('/bandcamp/search', { params: { q: query, type } }) - return data -} - -export async function getBandcampEmbed(url: string): Promise { - const { data } = await api.get('/bandcamp/embed', { params: { url } }) - return data -} +export const getBandcampTags = () => + api.get('/bandcamp/tags').then((r) => r.data) // Playlist Fix export interface OutlierTrack { diff --git a/frontend/src/pages/BandcampDiscover.tsx b/frontend/src/pages/BandcampDiscover.tsx new file mode 100644 index 0000000..cd50b6e --- /dev/null +++ b/frontend/src/pages/BandcampDiscover.tsx @@ -0,0 +1,206 @@ +import { useState, useEffect } from 'react' +import { Disc3, Music, ExternalLink, Loader2 } from 'lucide-react' +import { discoverBandcamp, getBandcampTags, BandcampRelease } from '../lib/api' + +const SORT_OPTIONS = [ + { value: 'new', label: 'New Releases' }, + { value: 'rec', label: 'Recommended' }, + { value: 'pop', label: 'Popular' }, +] + +export default function BandcampDiscover() { + const [tags, setTags] = useState([]) + const [selectedTags, setSelectedTags] = useState([]) + const [sort, setSort] = useState('new') + const [releases, setReleases] = useState([]) + const [page, setPage] = useState(1) + const [loading, setLoading] = useState(false) + const [loadingMore, setLoadingMore] = useState(false) + const [tagsLoading, setTagsLoading] = useState(true) + + useEffect(() => { + getBandcampTags() + .then(setTags) + .catch(() => setTags(['indie-rock', 'electronic', 'shoegaze', 'ambient', 'punk', 'experimental', 'hip-hop', 'jazz', 'folk', 'metal', 'post-punk', 'synthwave'])) + .finally(() => setTagsLoading(false)) + }, []) + + const fetchReleases = async (newPage: number, append: boolean = false) => { + if (selectedTags.length === 0) return + append ? setLoadingMore(true) : setLoading(true) + try { + const data = await discoverBandcamp(selectedTags.join(','), sort, newPage) + setReleases(append ? (prev) => [...prev, ...data] : data) + setPage(newPage) + } catch { + // silently handle + } finally { + setLoading(false) + setLoadingMore(false) + } + } + + useEffect(() => { + if (selectedTags.length > 0) { + fetchReleases(1) + } else { + setReleases([]) + setPage(1) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [selectedTags, sort]) + + const toggleTag = (tag: string) => { + setSelectedTags((prev) => + prev.includes(tag) ? prev.filter((t) => t !== tag) : [...prev, tag] + ) + } + + return ( +
+ {/* Header */} +
+
+ +

Bandcamp Discovery

+
+

Browse new independent releases

+
+ + {/* Tag Selector */} +
+

Genres

+ {tagsLoading ? ( +
+ + Loading tags... +
+ ) : ( +
+ {tags.map((tag) => { + const selected = selectedTags.includes(tag) + return ( + + ) + })} +
+ )} +
+ + {/* Sort Toggle */} +
+
+ {SORT_OPTIONS.map((opt) => ( + + ))} +
+
+ + {/* Results */} + {selectedTags.length === 0 ? ( +
+ +

Select some genres to start digging

+
+ ) : loading ? ( +
+ +
+ ) : releases.length === 0 ? ( +
+ +

No releases found for these tags

+
+ ) : ( + <> +
+ {releases.map((release, i) => ( +
+ {/* Album Art */} +
+ {release.art_url ? ( + {`${release.title} + ) : ( +
+ +
+ )} +
+ + {/* Info */} +
+

+ {release.title} +

+

+ {release.artist} +

+ {release.genre && ( + + {release.genre} + + )} + + + Listen on Bandcamp + +
+
+ ))} +
+ + {/* Load More */} +
+ +
+ + )} +
+ ) +}