diff --git a/backend/app/services/musicbrainz.py b/backend/app/services/musicbrainz.py new file mode 100644 index 0000000..90443c4 --- /dev/null +++ b/backend/app/services/musicbrainz.py @@ -0,0 +1,104 @@ +"""MusicBrainz API client for verifying songs exist.""" + +import httpx + +MB_API_URL = "https://musicbrainz.org/ws/2" +HEADERS = { + "User-Agent": "Vynl/1.0 (chris.ryan@deepcutsai.com)", + "Accept": "application/json", +} + +# MusicBrainz rate limit: 1 request per second +import time +_last_request_time = 0.0 + + +def _rate_limit(): + global _last_request_time + now = time.time() + elapsed = now - _last_request_time + if elapsed < 1.1: + time.sleep(1.1 - elapsed) + _last_request_time = time.time() + + +def verify_track(artist: str, title: str) -> dict | None: + """Verify a track exists on MusicBrainz and return canonical data. + + Returns dict with: artist, title, album, mb_id or None if not found. + """ + _rate_limit() + try: + resp = httpx.get( + f"{MB_API_URL}/recording", + params={ + "query": f'artist:"{artist}" recording:"{title}"', + "fmt": "json", + "limit": 5, + }, + headers=HEADERS, + timeout=10, + ) + if resp.status_code != 200: + return None + + data = resp.json() + recordings = data.get("recordings", []) + + for rec in recordings: + score = rec.get("score", 0) + if score < 70: + continue + + rec_title = rec.get("title", "") + rec_artists = rec.get("artist-credit", []) + rec_artist = rec_artists[0]["name"] if rec_artists else "" + + # Get album from first release + album = None + releases = rec.get("releases", []) + if releases: + album = releases[0].get("title") + + return { + "artist": rec_artist, + "title": rec_title, + "album": album, + "mb_id": rec.get("id"), + "score": score, + } + + return None + except Exception: + return None + + +def search_artist(artist: str) -> dict | None: + """Verify an artist exists on MusicBrainz.""" + _rate_limit() + try: + resp = httpx.get( + f"{MB_API_URL}/artist", + params={ + "query": f'artist:"{artist}"', + "fmt": "json", + "limit": 3, + }, + headers=HEADERS, + timeout=10, + ) + if resp.status_code != 200: + return None + + data = resp.json() + artists = data.get("artists", []) + for a in artists: + if a.get("score", 0) >= 80: + return { + "name": a.get("name"), + "mb_id": a.get("id"), + "score": a.get("score"), + } + return None + except Exception: + return None diff --git a/backend/app/services/recommender.py b/backend/app/services/recommender.py index ee4d0f2..fed9f9e 100644 --- a/backend/app/services/recommender.py +++ b/backend/app/services/recommender.py @@ -216,34 +216,46 @@ Return ONLY the JSON array, no other text.""" except json.JSONDecodeError: return [], remaining - # Verify each recommendation exists on YouTube Music before saving + # Verify recommendations exist using MusicBrainz, get YouTube Music links import asyncio - from app.services.youtube_music import search_track - from difflib import SequenceMatcher + from app.services.musicbrainz import verify_track as mb_verify + from app.services.youtube_music import search_track as yt_search - def _normalize(s: str) -> str: - import re - return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() + def verify_and_link(artist: str, title: str) -> dict | None: + """Verify track on MusicBrainz, then get YouTube Music link.""" + # Step 1: Verify on MusicBrainz + mb_result = mb_verify(artist, title) + if not mb_result: + return None - def _sim(a: str, b: str) -> float: - return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() + real_artist = mb_result["artist"] + real_title = mb_result["title"] + album = mb_result.get("album") - def verify_track(artist: str, title: str) -> dict | None: - """Search YouTube Music to verify a track exists and get the real data.""" + # Step 2: Get YouTube Music link for listening + youtube_url = None + image_url = None try: - results = search_track(f"{artist} {title}") - for r in results[:3]: - r_artist = r.get("artist", "") - r_title = r.get("title", "") - # Check artist similarity (>0.5) and title similarity (>0.4) - if _sim(r_artist, artist) >= 0.5 and _sim(r_title, title) >= 0.4: - return r - # Or very strong artist match with any title - if _sim(r_artist, artist) >= 0.8: - return r + yt_results = yt_search(f"{real_artist} {real_title}") + if yt_results: + yt = yt_results[0] + yt_id = yt.get("youtube_id") + if yt_id: + youtube_url = f"https://music.youtube.com/watch?v={yt_id}" + image_url = yt.get("image_url") except Exception: pass - return None + + if not youtube_url: + youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title}')}" + + return { + "artist": real_artist, + "title": real_title, + "album": album, + "youtube_url": youtube_url, + "image_url": image_url, + } # Save to DB — only keep verified tracks recommendations = [] @@ -255,35 +267,23 @@ Return ONLY the JSON array, no other text.""" title = rec.get("title", "Unknown") reason = rec.get("reason", "") - # Verify on YouTube Music (run sync in thread) - verified = await asyncio.to_thread(verify_track, artist, title) + # Verify on MusicBrainz + get YouTube link (sync, run in thread) + verified = await asyncio.to_thread(verify_and_link, artist, title) if not verified: - continue # Skip hallucinated songs - - # Use verified data (correct artist/title from YouTube Music) - real_artist = verified.get("artist", artist) - real_title = verified.get("title", title) - youtube_id = verified.get("youtube_id") - image_url = verified.get("image_url") - - # Direct YouTube link if we have a video ID, otherwise search - if youtube_id: - youtube_url = f"https://music.youtube.com/watch?v={youtube_id}" - else: - youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title} official music video')}" + continue # Song doesn't exist — AI hallucinated it r = Recommendation( user_id=user.id, playlist_id=playlist_id, - title=real_title, - artist=real_artist, - album=rec.get("album"), - image_url=image_url, + title=verified["title"], + artist=verified["artist"], + album=verified.get("album") or rec.get("album"), + image_url=verified.get("image_url"), reason=reason, score=rec.get("score"), query=query, - youtube_url=youtube_url, + youtube_url=verified["youtube_url"], ) db.add(r) recommendations.append(r)