Switch to MusicBrainz for song verification, YouTube Music for playback links
This commit is contained in:
104
backend/app/services/musicbrainz.py
Normal file
104
backend/app/services/musicbrainz.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""MusicBrainz API client for verifying songs exist."""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
MB_API_URL = "https://musicbrainz.org/ws/2"
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "Vynl/1.0 (chris.ryan@deepcutsai.com)",
|
||||||
|
"Accept": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
# MusicBrainz rate limit: 1 request per second
|
||||||
|
import time
|
||||||
|
_last_request_time = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _rate_limit():
|
||||||
|
global _last_request_time
|
||||||
|
now = time.time()
|
||||||
|
elapsed = now - _last_request_time
|
||||||
|
if elapsed < 1.1:
|
||||||
|
time.sleep(1.1 - elapsed)
|
||||||
|
_last_request_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
def verify_track(artist: str, title: str) -> dict | None:
|
||||||
|
"""Verify a track exists on MusicBrainz and return canonical data.
|
||||||
|
|
||||||
|
Returns dict with: artist, title, album, mb_id or None if not found.
|
||||||
|
"""
|
||||||
|
_rate_limit()
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"{MB_API_URL}/recording",
|
||||||
|
params={
|
||||||
|
"query": f'artist:"{artist}" recording:"{title}"',
|
||||||
|
"fmt": "json",
|
||||||
|
"limit": 5,
|
||||||
|
},
|
||||||
|
headers=HEADERS,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
recordings = data.get("recordings", [])
|
||||||
|
|
||||||
|
for rec in recordings:
|
||||||
|
score = rec.get("score", 0)
|
||||||
|
if score < 70:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rec_title = rec.get("title", "")
|
||||||
|
rec_artists = rec.get("artist-credit", [])
|
||||||
|
rec_artist = rec_artists[0]["name"] if rec_artists else ""
|
||||||
|
|
||||||
|
# Get album from first release
|
||||||
|
album = None
|
||||||
|
releases = rec.get("releases", [])
|
||||||
|
if releases:
|
||||||
|
album = releases[0].get("title")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"artist": rec_artist,
|
||||||
|
"title": rec_title,
|
||||||
|
"album": album,
|
||||||
|
"mb_id": rec.get("id"),
|
||||||
|
"score": score,
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def search_artist(artist: str) -> dict | None:
|
||||||
|
"""Verify an artist exists on MusicBrainz."""
|
||||||
|
_rate_limit()
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"{MB_API_URL}/artist",
|
||||||
|
params={
|
||||||
|
"query": f'artist:"{artist}"',
|
||||||
|
"fmt": "json",
|
||||||
|
"limit": 3,
|
||||||
|
},
|
||||||
|
headers=HEADERS,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
artists = data.get("artists", [])
|
||||||
|
for a in artists:
|
||||||
|
if a.get("score", 0) >= 80:
|
||||||
|
return {
|
||||||
|
"name": a.get("name"),
|
||||||
|
"mb_id": a.get("id"),
|
||||||
|
"score": a.get("score"),
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
@@ -216,34 +216,46 @@ Return ONLY the JSON array, no other text."""
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return [], remaining
|
return [], remaining
|
||||||
|
|
||||||
# Verify each recommendation exists on YouTube Music before saving
|
# Verify recommendations exist using MusicBrainz, get YouTube Music links
|
||||||
import asyncio
|
import asyncio
|
||||||
from app.services.youtube_music import search_track
|
from app.services.musicbrainz import verify_track as mb_verify
|
||||||
from difflib import SequenceMatcher
|
from app.services.youtube_music import search_track as yt_search
|
||||||
|
|
||||||
def _normalize(s: str) -> str:
|
def verify_and_link(artist: str, title: str) -> dict | None:
|
||||||
import re
|
"""Verify track on MusicBrainz, then get YouTube Music link."""
|
||||||
return re.sub(r'[^a-z0-9\s]', '', s.lower()).strip()
|
# Step 1: Verify on MusicBrainz
|
||||||
|
mb_result = mb_verify(artist, title)
|
||||||
|
if not mb_result:
|
||||||
|
return None
|
||||||
|
|
||||||
def _sim(a: str, b: str) -> float:
|
real_artist = mb_result["artist"]
|
||||||
return SequenceMatcher(None, _normalize(a), _normalize(b)).ratio()
|
real_title = mb_result["title"]
|
||||||
|
album = mb_result.get("album")
|
||||||
|
|
||||||
def verify_track(artist: str, title: str) -> dict | None:
|
# Step 2: Get YouTube Music link for listening
|
||||||
"""Search YouTube Music to verify a track exists and get the real data."""
|
youtube_url = None
|
||||||
|
image_url = None
|
||||||
try:
|
try:
|
||||||
results = search_track(f"{artist} {title}")
|
yt_results = yt_search(f"{real_artist} {real_title}")
|
||||||
for r in results[:3]:
|
if yt_results:
|
||||||
r_artist = r.get("artist", "")
|
yt = yt_results[0]
|
||||||
r_title = r.get("title", "")
|
yt_id = yt.get("youtube_id")
|
||||||
# Check artist similarity (>0.5) and title similarity (>0.4)
|
if yt_id:
|
||||||
if _sim(r_artist, artist) >= 0.5 and _sim(r_title, title) >= 0.4:
|
youtube_url = f"https://music.youtube.com/watch?v={yt_id}"
|
||||||
return r
|
image_url = yt.get("image_url")
|
||||||
# Or very strong artist match with any title
|
|
||||||
if _sim(r_artist, artist) >= 0.8:
|
|
||||||
return r
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return None
|
|
||||||
|
if not youtube_url:
|
||||||
|
youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title}')}"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"artist": real_artist,
|
||||||
|
"title": real_title,
|
||||||
|
"album": album,
|
||||||
|
"youtube_url": youtube_url,
|
||||||
|
"image_url": image_url,
|
||||||
|
}
|
||||||
|
|
||||||
# Save to DB — only keep verified tracks
|
# Save to DB — only keep verified tracks
|
||||||
recommendations = []
|
recommendations = []
|
||||||
@@ -255,35 +267,23 @@ Return ONLY the JSON array, no other text."""
|
|||||||
title = rec.get("title", "Unknown")
|
title = rec.get("title", "Unknown")
|
||||||
reason = rec.get("reason", "")
|
reason = rec.get("reason", "")
|
||||||
|
|
||||||
# Verify on YouTube Music (run sync in thread)
|
# Verify on MusicBrainz + get YouTube link (sync, run in thread)
|
||||||
verified = await asyncio.to_thread(verify_track, artist, title)
|
verified = await asyncio.to_thread(verify_and_link, artist, title)
|
||||||
|
|
||||||
if not verified:
|
if not verified:
|
||||||
continue # Skip hallucinated songs
|
continue # Song doesn't exist — AI hallucinated it
|
||||||
|
|
||||||
# Use verified data (correct artist/title from YouTube Music)
|
|
||||||
real_artist = verified.get("artist", artist)
|
|
||||||
real_title = verified.get("title", title)
|
|
||||||
youtube_id = verified.get("youtube_id")
|
|
||||||
image_url = verified.get("image_url")
|
|
||||||
|
|
||||||
# Direct YouTube link if we have a video ID, otherwise search
|
|
||||||
if youtube_id:
|
|
||||||
youtube_url = f"https://music.youtube.com/watch?v={youtube_id}"
|
|
||||||
else:
|
|
||||||
youtube_url = f"https://www.youtube.com/results?search_query={quote_plus(f'{real_artist} {real_title} official music video')}"
|
|
||||||
|
|
||||||
r = Recommendation(
|
r = Recommendation(
|
||||||
user_id=user.id,
|
user_id=user.id,
|
||||||
playlist_id=playlist_id,
|
playlist_id=playlist_id,
|
||||||
title=real_title,
|
title=verified["title"],
|
||||||
artist=real_artist,
|
artist=verified["artist"],
|
||||||
album=rec.get("album"),
|
album=verified.get("album") or rec.get("album"),
|
||||||
image_url=image_url,
|
image_url=verified.get("image_url"),
|
||||||
reason=reason,
|
reason=reason,
|
||||||
score=rec.get("score"),
|
score=rec.get("score"),
|
||||||
query=query,
|
query=query,
|
||||||
youtube_url=youtube_url,
|
youtube_url=verified["youtube_url"],
|
||||||
)
|
)
|
||||||
db.add(r)
|
db.add(r)
|
||||||
recommendations.append(r)
|
recommendations.append(r)
|
||||||
|
|||||||
Reference in New Issue
Block a user