Add Bandcamp discovery via public API (no scraping) - browse new releases by genre tag

This commit is contained in:
root
2026-03-31 09:58:28 -05:00
parent be30a47bbb
commit 152f217675
7 changed files with 295 additions and 301 deletions

View File

@@ -1,58 +1,25 @@
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from fastapi import APIRouter, Depends, Query
from app.core.security import get_current_user
from app.models.user import User
from app.services.bandcamp import search_bandcamp, get_embed_data
from app.services.bandcamp import discover_by_tag, get_trending_tags
router = APIRouter(prefix="/bandcamp", tags=["bandcamp"])
class BandcampResult(BaseModel):
title: str
artist: str
art_url: str | None = None
bandcamp_url: str
item_type: str
class BandcampEmbedResponse(BaseModel):
embed_url: str
title: str
artist: str
art_url: str | None = None
@router.get("/search", response_model=list[BandcampResult])
async def bandcamp_search(
q: str = Query(..., min_length=1),
type: str = Query("t", pattern="^[tab]$"),
@router.get("/discover")
async def bandcamp_discover(
tags: str = Query(..., description="Comma-separated tags, e.g. 'indie-rock,shoegaze'"),
sort: str = Query("new", description="Sort: new, rec, or pop"),
page: int = Query(1),
user: User = Depends(get_current_user),
):
"""Search Bandcamp for tracks, albums, or artists."""
results = await search_bandcamp(q.strip(), item_type=type)
return [BandcampResult(**r) for r in results]
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
if not tag_list:
return []
return await discover_by_tag(tag_list, sort=sort, page=page)
@router.get("/embed", response_model=BandcampEmbedResponse)
async def bandcamp_embed(
url: str = Query(..., min_length=1),
user: User = Depends(get_current_user),
):
"""Get embed data for a Bandcamp URL."""
if "bandcamp.com" not in url:
raise HTTPException(status_code=400, detail="Not a valid Bandcamp URL")
data = await get_embed_data(url.strip())
if not data:
raise HTTPException(
status_code=404,
detail="Could not extract embed data from this Bandcamp page",
)
return BandcampEmbedResponse(
embed_url=data["embed_url"],
title=data["title"],
artist=data["artist"],
art_url=data.get("art_url"),
)
@router.get("/tags")
async def bandcamp_tags(user: User = Depends(get_current_user)):
return await get_trending_tags()

View File

@@ -2,7 +2,7 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.core.config import settings
from app.api.endpoints import auth, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music
from app.api.endpoints import auth, bandcamp, billing, lastfm, manual_import, playlist_fix, playlists, profile, recommendations, youtube_music
app = FastAPI(title="Vynl API", version="1.0.0", redirect_slashes=False)
@@ -22,6 +22,7 @@ app.include_router(recommendations.router, prefix="/api")
app.include_router(youtube_music.router, prefix="/api")
app.include_router(manual_import.router, prefix="/api")
app.include_router(lastfm.router, prefix="/api")
app.include_router(bandcamp.router, prefix="/api")
app.include_router(profile.router, prefix="/api")

View File

@@ -1,72 +1,41 @@
import re
from difflib import SequenceMatcher
"""Bandcamp discovery using their public APIs (no scraping)."""
import httpx
AUTOCOMPLETE_URL = "https://bandcamp.com/api/fuzzysearch/2/autocomplete"
SEARCH_URL = "https://bandcamp.com/search"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
}
def _normalize(s: str) -> str:
"""Normalize string for comparison."""
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
DIG_DEEPER_URL = "https://bandcamp.com/api/hub/2/dig_deeper"
def _similarity(a: str, b: str) -> float:
"""Return similarity ratio between two strings."""
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
async def discover_by_tag(
tags: list[str],
sort: str = "new",
page: int = 1,
) -> list[dict]:
"""Discover new music on Bandcamp by tag using their public API.
Args:
tags: List of genre/tag strings (e.g. ["indie-rock", "shoegaze"])
sort: "new", "rec", or "pop" (new releases, recommended, popular)
page: Page number for pagination
async def search_bandcamp_verified(artist: str, title: str) -> dict | None:
"""Search Bandcamp and only return a result if the artist actually matches.
Returns the best matching result or None if no good match found.
First tries artist+song, then falls back to artist-only search.
Returns list of releases with: title, artist, art_url, bandcamp_url, genre, item_type
"""
# Try track search first: "artist title"
results = await search_bandcamp(f"{artist} {title}", item_type="t")
for r in results:
artist_sim = _similarity(r.get("artist", ""), artist)
title_sim = _similarity(r.get("title", ""), title)
# Require artist to be a strong match (>0.75) AND title reasonable (>0.5)
if artist_sim >= 0.75 and title_sim >= 0.5:
return r
# Try artist/band search as fallback — return their artist page URL
results = await search_bandcamp(artist, item_type="b")
for r in results:
# For band results, title IS the band name
name = r.get("title", "") or r.get("artist", "")
if _similarity(name, artist) >= 0.7:
return r
return None
async def search_bandcamp(query: str, item_type: str = "t") -> list[dict]:
"""Search Bandcamp for tracks, albums, or artists.
item_type: 't' for tracks, 'a' for albums, 'b' for bands/artists.
"""
# Try autocomplete API first
results = await _search_autocomplete(query, item_type)
if results:
return results
# Fall back to HTML scraping
return await _search_html(query, item_type)
async def _search_autocomplete(query: str, item_type: str) -> list[dict]:
"""Try the undocumented Bandcamp autocomplete API."""
try:
async with httpx.AsyncClient(timeout=10, headers=HEADERS) as client:
resp = await client.get(AUTOCOMPLETE_URL, params={"q": query})
async with httpx.AsyncClient(timeout=15, headers=HEADERS) as client:
resp = await client.post(
DIG_DEEPER_URL,
json={
"filters": {
"format": "all",
"location": 0,
"sort": sort,
"tags": tags,
},
"page": page,
},
)
if resp.status_code != 200:
return []
@@ -74,182 +43,31 @@ async def _search_autocomplete(query: str, item_type: str) -> list[dict]:
data = resp.json()
results = []
# The autocomplete API returns results grouped by type
auto_results = data.get("results", [])
for item in auto_results:
result_type = item.get("type", "")
for item in data.get("items", []):
art_id = item.get("art_id")
art_url = f"https://f4.bcbits.com/img/a{art_id}_16.jpg" if art_id else None
# Map autocomplete types to our item_type filter
if item_type == "t" and result_type != "t":
continue
if item_type == "a" and result_type != "a":
continue
if item_type == "b" and result_type != "b":
continue
tralbum_type = item.get("tralbum_type", "a")
type_path = "album" if tralbum_type == "a" else "track"
item_url = item.get("tralbum_url", "")
results.append({
"title": item.get("name", ""),
"artist": item.get("band_name", ""),
"art_url": item.get("img", item.get("art_id", None)),
"bandcamp_url": item.get("url", ""),
"item_type": result_type,
})
return results[:20]
except Exception:
return []
async def _search_html(query: str, item_type: str) -> list[dict]:
"""Fall back to scraping Bandcamp search results HTML."""
params = {"q": query, "item_type": item_type}
try:
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
resp = await client.get(SEARCH_URL, params=params)
if resp.status_code != 200:
return []
html = resp.text
results = []
# Split by search result items
items = re.split(r'<li\s+class="searchresult\s', html)
for item_html in items[1:]: # skip first split (before first result)
# Extract title and URL from heading link
heading_match = re.search(
r'class="heading">\s*<a\s+href="([^"]+)"[^>]*>\s*([^<]+)',
item_html,
)
if not heading_match:
continue
url = heading_match.group(1).strip()
title = heading_match.group(2).strip()
# Extract artist/subhead info
subhead_match = re.search(
r'class="subhead">\s*([^<]+)', item_html
)
artist = ""
if subhead_match:
subhead = subhead_match.group(1).strip()
# Subhead format varies: "by Artist" or "from Album by Artist"
by_match = re.search(r'by\s+(.+)', subhead)
if by_match:
artist = by_match.group(1).strip()
else:
artist = subhead
# Extract album art URL
art_match = re.search(
r'class="art">\s*<img\s+src="([^"]+)"', item_html
)
art_url = art_match.group(1).strip() if art_match else None
results.append({
"title": title,
"artist": artist,
"title": item.get("title", ""),
"artist": item.get("artist", ""),
"art_url": art_url,
"bandcamp_url": url,
"item_type": item_type,
"bandcamp_url": item_url,
"genre": ", ".join(tags),
"item_type": type_path,
})
if len(results) >= 20:
break
return results
except Exception:
return []
async def get_embed_data(bandcamp_url: str) -> dict | None:
"""Get embed info for a Bandcamp URL.
Fetches the page HTML, extracts the track/album ID, and returns
the embed iframe URL along with metadata.
"""
try:
async with httpx.AsyncClient(timeout=15, headers=HEADERS, follow_redirects=True) as client:
resp = await client.get(bandcamp_url)
if resp.status_code != 200:
return None
html = resp.text
# Determine if this is a track or album URL
is_track = "/track/" in bandcamp_url
# Try to extract the ID from meta tags or data attributes
# Look for: <meta property="og:video" content="...album=12345..." />
# or data-tralbum-id="12345"
item_id = None
tralbum_match = re.search(r'data-tralbum-id="(\d+)"', html)
if tralbum_match:
item_id = tralbum_match.group(1)
if not item_id:
# Try og:video meta tag which contains embed URL with ID
og_match = re.search(
r'<meta\s+property="og:video"\s+content="[^"]*(?:album|track)=(\d+)',
html,
)
if og_match:
item_id = og_match.group(1)
if not item_id:
# Try the embedded player link in the page
embed_match = re.search(
r'EmbeddedPlayer/(?:album|track)=(\d+)', html
)
if embed_match:
item_id = embed_match.group(1)
if not item_id:
return None
# Build embed URL
id_type = "track" if is_track else "album"
embed_url = (
f"https://bandcamp.com/EmbeddedPlayer/"
f"{id_type}={item_id}/size=large/"
f"bgcol=1C1917/linkcol=7C3AED/"
f"tracklist=false/transparent=true/"
)
# Extract title from og:title
title = ""
title_match = re.search(
r'<meta\s+property="og:title"\s+content="([^"]+)"', html
)
if title_match:
title = title_match.group(1).strip()
# Extract artist
artist = ""
artist_match = re.search(
r'<meta\s+property="og:site_name"\s+content="([^"]+)"', html
)
if artist_match:
artist = artist_match.group(1).strip()
# Extract art
art_url = None
art_match = re.search(
r'<meta\s+property="og:image"\s+content="([^"]+)"', html
)
if art_match:
art_url = art_match.group(1).strip()
return {
"embed_url": embed_url,
"title": title,
"artist": artist,
"art_url": art_url,
"item_id": item_id,
"item_type": id_type,
}
except Exception:
return None
async def get_trending_tags() -> list[str]:
"""Return common Bandcamp genre tags for discovery."""
return [
"indie-rock", "electronic", "hip-hop-rap", "ambient", "punk",
"experimental", "folk", "jazz", "metal", "pop", "r-b-soul",
"shoegaze", "post-punk", "synthwave", "lo-fi", "dream-pop",
"indie-pop", "psychedelic", "garage-rock", "emo",
]

View File

@@ -11,6 +11,7 @@ import Discover from './pages/Discover'
import Recommendations from './pages/Recommendations'
import Billing from './pages/Billing'
import TasteProfilePage from './pages/TasteProfilePage'
import BandcampDiscover from './pages/BandcampDiscover'
function RootRedirect() {
const { user, loading } = useAuth()
@@ -82,6 +83,16 @@ function AppRoutes() {
</ProtectedRoute>
}
/>
<Route
path="/bandcamp"
element={
<ProtectedRoute>
<Layout>
<BandcampDiscover />
</Layout>
</ProtectedRoute>
}
/>
<Route
path="/saved"
element={

View File

@@ -1,6 +1,6 @@
import { useState } from 'react'
import { Link, useLocation, useNavigate } from 'react-router-dom'
import { Disc3, LayoutDashboard, Fingerprint, ListMusic, Compass, Heart, Crown, Menu, X, LogOut, User } from 'lucide-react'
import { Disc3, LayoutDashboard, Fingerprint, ListMusic, Compass, Store, Heart, Crown, Menu, X, LogOut, User } from 'lucide-react'
import { useAuth } from '../lib/auth'
const navItems = [
@@ -8,6 +8,7 @@ const navItems = [
{ path: '/profile', label: 'My Taste', icon: Fingerprint },
{ path: '/playlists', label: 'Playlists', icon: ListMusic },
{ path: '/discover', label: 'Discover', icon: Compass },
{ path: '/bandcamp', label: 'Bandcamp', icon: Store },
{ path: '/saved', label: 'Saved', icon: Heart },
{ path: '/billing', label: 'Pro', icon: Crown },
]

View File

@@ -256,30 +256,20 @@ export const getBillingStatus = () =>
api.get<BillingStatusResponse>('/billing/status').then((r) => r.data)
// Bandcamp
export interface BandcampResult {
export interface BandcampRelease {
title: string
artist: string
art_url: string | null
bandcamp_url: string
genre: string
item_type: string
}
export interface BandcampEmbed {
embed_url: string
title: string
artist: string
art_url: string | null
}
export const discoverBandcamp = (tags: string, sort: string = 'new', page: number = 1) =>
api.get<BandcampRelease[]>('/bandcamp/discover', { params: { tags, sort, page } }).then((r) => r.data)
export async function searchBandcamp(query: string, type: string = 't'): Promise<BandcampResult[]> {
const { data } = await api.get('/bandcamp/search', { params: { q: query, type } })
return data
}
export async function getBandcampEmbed(url: string): Promise<BandcampEmbed> {
const { data } = await api.get('/bandcamp/embed', { params: { url } })
return data
}
export const getBandcampTags = () =>
api.get<string[]>('/bandcamp/tags').then((r) => r.data)
// Playlist Fix
export interface OutlierTrack {

View File

@@ -0,0 +1,206 @@
import { useState, useEffect } from 'react'
import { Disc3, Music, ExternalLink, Loader2 } from 'lucide-react'
import { discoverBandcamp, getBandcampTags, BandcampRelease } from '../lib/api'
const SORT_OPTIONS = [
{ value: 'new', label: 'New Releases' },
{ value: 'rec', label: 'Recommended' },
{ value: 'pop', label: 'Popular' },
]
export default function BandcampDiscover() {
const [tags, setTags] = useState<string[]>([])
const [selectedTags, setSelectedTags] = useState<string[]>([])
const [sort, setSort] = useState('new')
const [releases, setReleases] = useState<BandcampRelease[]>([])
const [page, setPage] = useState(1)
const [loading, setLoading] = useState(false)
const [loadingMore, setLoadingMore] = useState(false)
const [tagsLoading, setTagsLoading] = useState(true)
useEffect(() => {
getBandcampTags()
.then(setTags)
.catch(() => setTags(['indie-rock', 'electronic', 'shoegaze', 'ambient', 'punk', 'experimental', 'hip-hop', 'jazz', 'folk', 'metal', 'post-punk', 'synthwave']))
.finally(() => setTagsLoading(false))
}, [])
const fetchReleases = async (newPage: number, append: boolean = false) => {
if (selectedTags.length === 0) return
append ? setLoadingMore(true) : setLoading(true)
try {
const data = await discoverBandcamp(selectedTags.join(','), sort, newPage)
setReleases(append ? (prev) => [...prev, ...data] : data)
setPage(newPage)
} catch {
// silently handle
} finally {
setLoading(false)
setLoadingMore(false)
}
}
useEffect(() => {
if (selectedTags.length > 0) {
fetchReleases(1)
} else {
setReleases([])
setPage(1)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedTags, sort])
const toggleTag = (tag: string) => {
setSelectedTags((prev) =>
prev.includes(tag) ? prev.filter((t) => t !== tag) : [...prev, tag]
)
}
return (
<div className="min-h-[80vh]">
{/* Header */}
<div className="mb-8">
<div className="flex items-center gap-3 mb-2">
<Disc3 className="w-8 h-8 text-purple" />
<h1 className="text-3xl font-bold text-charcoal">Bandcamp Discovery</h1>
</div>
<p className="text-charcoal-muted">Browse new independent releases</p>
</div>
{/* Tag Selector */}
<div className="mb-6">
<h2 className="text-sm font-semibold text-charcoal-muted uppercase tracking-wider mb-3">Genres</h2>
{tagsLoading ? (
<div className="flex items-center gap-2 text-charcoal-muted">
<Loader2 className="w-4 h-4 animate-spin" />
<span className="text-sm">Loading tags...</span>
</div>
) : (
<div className="flex flex-wrap gap-2">
{tags.map((tag) => {
const selected = selectedTags.includes(tag)
return (
<button
key={tag}
onClick={() => toggleTag(tag)}
className={`px-4 py-2 rounded-full text-sm font-medium transition-all cursor-pointer border ${
selected
? 'bg-purple text-white border-purple shadow-md'
: 'bg-white text-charcoal border-purple-200 hover:border-purple hover:text-purple'
}`}
>
{tag}
</button>
)
})}
</div>
)}
</div>
{/* Sort Toggle */}
<div className="mb-8">
<div className="flex gap-2">
{SORT_OPTIONS.map((opt) => (
<button
key={opt.value}
onClick={() => setSort(opt.value)}
className={`px-4 py-2 rounded-lg text-sm font-medium transition-all cursor-pointer border ${
sort === opt.value
? 'bg-charcoal text-white border-charcoal'
: 'bg-white text-charcoal-muted border-purple-100 hover:border-charcoal hover:text-charcoal'
}`}
>
{opt.label}
</button>
))}
</div>
</div>
{/* Results */}
{selectedTags.length === 0 ? (
<div className="text-center py-20">
<Disc3 className="w-16 h-16 text-purple-200 mx-auto mb-4" />
<p className="text-charcoal-muted text-lg">Select some genres to start digging</p>
</div>
) : loading ? (
<div className="flex items-center justify-center py-20">
<Loader2 className="w-10 h-10 text-purple animate-spin" />
</div>
) : releases.length === 0 ? (
<div className="text-center py-20">
<Music className="w-16 h-16 text-purple-200 mx-auto mb-4" />
<p className="text-charcoal-muted text-lg">No releases found for these tags</p>
</div>
) : (
<>
<div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-5 gap-5">
{releases.map((release, i) => (
<div
key={`${release.bandcamp_url}-${i}`}
className="bg-white rounded-xl overflow-hidden shadow-md hover:shadow-xl transition-shadow group border border-purple-50"
style={{ boxShadow: '0 4px 20px rgba(124, 58, 237, 0.08)' }}
>
{/* Album Art */}
<div className="aspect-square relative overflow-hidden">
{release.art_url ? (
<img
src={release.art_url}
alt={`${release.title} by ${release.artist}`}
className="w-full h-full object-cover group-hover:scale-105 transition-transform duration-300"
/>
) : (
<div className="w-full h-full bg-gradient-to-br from-purple to-purple-800 flex items-center justify-center">
<Music className="w-12 h-12 text-white/40" />
</div>
)}
</div>
{/* Info */}
<div className="p-3">
<h3 className="font-bold text-charcoal text-sm leading-tight truncate" title={release.title}>
{release.title}
</h3>
<p className="text-charcoal-muted text-xs mt-1 truncate" title={release.artist}>
{release.artist}
</p>
{release.genre && (
<span className="inline-block mt-2 text-[10px] font-medium text-purple bg-purple-50 px-2 py-0.5 rounded-full">
{release.genre}
</span>
)}
<a
href={release.bandcamp_url}
target="_blank"
rel="noopener noreferrer"
className="mt-3 flex items-center justify-center gap-1.5 w-full py-2 rounded-lg text-xs font-medium bg-charcoal text-white hover:bg-charcoal/80 transition-colors no-underline"
>
<ExternalLink className="w-3 h-3" />
Listen on Bandcamp
</a>
</div>
</div>
))}
</div>
{/* Load More */}
<div className="flex justify-center mt-10 mb-4">
<button
onClick={() => fetchReleases(page + 1, true)}
disabled={loadingMore}
className="px-8 py-3 rounded-xl bg-purple text-white font-medium hover:bg-purple-700 transition-colors cursor-pointer border-none disabled:opacity-50 flex items-center gap-2"
>
{loadingMore ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
Loading...
</>
) : (
'Load More'
)}
</button>
</div>
</>
)}
</div>
)
}