Switch to MusicBrainz for song verification, YouTube Music for playback links

This commit is contained in:
root
2026-03-31 10:15:32 -05:00
parent 9f9f9581d6
commit 85d4210a21
2 changed files with 145 additions and 41 deletions

View File

@@ -0,0 +1,104 @@
"""MusicBrainz API client for verifying songs exist."""
import httpx
MB_API_URL = "https://musicbrainz.org/ws/2"
HEADERS = {
"User-Agent": "Vynl/1.0 (chris.ryan@deepcutsai.com)",
"Accept": "application/json",
}
# MusicBrainz rate limit: 1 request per second
import time
_last_request_time = 0.0
def _rate_limit():
global _last_request_time
now = time.time()
elapsed = now - _last_request_time
if elapsed < 1.1:
time.sleep(1.1 - elapsed)
_last_request_time = time.time()
def verify_track(artist: str, title: str) -> dict | None:
"""Verify a track exists on MusicBrainz and return canonical data.
Returns dict with: artist, title, album, mb_id or None if not found.
"""
_rate_limit()
try:
resp = httpx.get(
f"{MB_API_URL}/recording",
params={
"query": f'artist:"{artist}" recording:"{title}"',
"fmt": "json",
"limit": 5,
},
headers=HEADERS,
timeout=10,
)
if resp.status_code != 200:
return None
data = resp.json()
recordings = data.get("recordings", [])
for rec in recordings:
score = rec.get("score", 0)
if score < 70:
continue
rec_title = rec.get("title", "")
rec_artists = rec.get("artist-credit", [])
rec_artist = rec_artists[0]["name"] if rec_artists else ""
# Get album from first release
album = None
releases = rec.get("releases", [])
if releases:
album = releases[0].get("title")
return {
"artist": rec_artist,
"title": rec_title,
"album": album,
"mb_id": rec.get("id"),
"score": score,
}
return None
except Exception:
return None
def search_artist(artist: str) -> dict | None:
"""Verify an artist exists on MusicBrainz."""
_rate_limit()
try:
resp = httpx.get(
f"{MB_API_URL}/artist",
params={
"query": f'artist:"{artist}"',
"fmt": "json",
"limit": 3,
},
headers=HEADERS,
timeout=10,
)
if resp.status_code != 200:
return None
data = resp.json()
artists = data.get("artists", [])
for a in artists:
if a.get("score", 0) >= 80:
return {
"name": a.get("name"),
"mb_id": a.get("id"),
"score": a.get("score"),
}
return None
except Exception:
return None

View File

@@ -216,34 +216,46 @@ Return ONLY the JSON array, no other text."""
except json.JSONDecodeError: except json.JSONDecodeError:
return [], remaining return [], remaining
# Verify each recommendation exists on YouTube Music before saving # Verify recommendations exist using MusicBrainz, get YouTube Music links
import asyncio import asyncio
from app.services.youtube_music import search_track from app.services.musicbrainz import verify_track as mb_verify
from difflib import SequenceMatcher from app.services.youtube_music import search_track as yt_search
def _normalize(s: str) -> str: def verify_and_link(artist: str, title: str) -> dict | None:
import re """Verify track on MusicBrainz, then get YouTube Music link."""
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip() # Step 1: Verify on MusicBrainz
mb_result = mb_verify(artist, title)
if not mb_result:
return None
def _sim(a: str, b: str) -> float: real_artist = mb_result["artist"]
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio() real_title = mb_result["title"]
album = mb_result.get("album")
def verify_track(artist: str, title: str) -> dict | None: # Step 2: Get YouTube Music link for listening
"""Search YouTube Music to verify a track exists and get the real data.""" youtube_url = None
image_url = None
try: try:
results = search_track(f"{artist} {title}") yt_results = yt_search(f"{real_artist} {real_title}")
for r in results[:3]: if yt_results:
r_artist = r.get("artist", "") yt = yt_results[0]
r_title = r.get("title", "") yt_id = yt.get("youtube_id")
# Check artist similarity (>0.5) and title similarity (>0.4) if yt_id:
if _sim(r_artist, artist) >= 0.5 and _sim(r_title, title) >= 0.4: youtube_url = f"https://music.youtube.com/watch?v={yt_id}"
return r image_url = yt.get("image_url")
# Or very strong artist match with any title
if _sim(r_artist, artist) >= 0.8:
return r
except Exception: except Exception:
pass pass
return None
if not youtube_url:
youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title}')}"
return {
"artist": real_artist,
"title": real_title,
"album": album,
"youtube_url": youtube_url,
"image_url": image_url,
}
# Save to DB — only keep verified tracks # Save to DB — only keep verified tracks
recommendations = [] recommendations = []
@@ -255,35 +267,23 @@ Return ONLY the JSON array, no other text."""
title = rec.get("title", "Unknown") title = rec.get("title", "Unknown")
reason = rec.get("reason", "") reason = rec.get("reason", "")
# Verify on YouTube Music (run sync in thread) # Verify on MusicBrainz + get YouTube link (sync, run in thread)
verified = await asyncio.to_thread(verify_track, artist, title) verified = await asyncio.to_thread(verify_and_link, artist, title)
if not verified: if not verified:
continue # Skip hallucinated songs continue # Song doesn't exist — AI hallucinated it
# Use verified data (correct artist/title from YouTube Music)
real_artist = verified.get("artist", artist)
real_title = verified.get("title", title)
youtube_id = verified.get("youtube_id")
image_url = verified.get("image_url")
# Direct YouTube link if we have a video ID, otherwise search
if youtube_id:
youtube_url = f"https://music.youtube.com/watch?v={youtube_id}"
else:
youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title} official music video')}"
r = Recommendation( r = Recommendation(
user_id=user.id, user_id=user.id,
playlist_id=playlist_id, playlist_id=playlist_id,
title=real_title, title=verified["title"],
artist=real_artist, artist=verified["artist"],
album=rec.get("album"), album=verified.get("album") or rec.get("album"),
image_url=image_url, image_url=verified.get("image_url"),
reason=reason, reason=reason,
score=rec.get("score"), score=rec.get("score"),
query=query, query=query,
youtube_url=youtube_url, youtube_url=verified["youtube_url"],
) )
db.add(r) db.add(r)
recommendations.append(r) recommendations.append(r)