vynl/backend/app/services/recommender.py

import json
from datetime import datetime, timezone, timedelta
from urllib.parse import quote_plus

import anthropic
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.config import settings
from app.models.track import Track
from app.models.playlist import Playlist
from app.models.recommendation import Recommendation
from app.models.user import User


def build_taste_profile(tracks: list[Track]) -> dict:
    """Analyze tracks to build a taste profile summary."""
    if not tracks:
        return {}

    genres_count: dict[str, int] = {}
    total_energy = 0.0
    total_dance = 0.0
    total_valence = 0.0
    total_tempo = 0.0
    count_features = 0

    for t in tracks:
        if t.genres:
            for g in t.genres:
                genres_count[g] = genres_count.get(g, 0) + 1
        if t.energy is not None:
            total_energy += t.energy
            total_dance += t.danceability or 0
            total_valence += t.valence or 0
            total_tempo += t.tempo or 0
            count_features += 1

    top_genres = sorted(genres_count.items(), key=lambda x: x[1], reverse=True)[:10]
    n = max(count_features, 1)

    return {
        "top_genres": [{"name": g, "count": c} for g, c in top_genres],
        "avg_energy": round(total_energy / n, 3),
        "avg_danceability": round(total_dance / n, 3),
        "avg_valence": round(total_valence / n, 3),
        "avg_tempo": round(total_tempo / n, 1),
        "track_count": len(tracks),
        "sample_artists": list({t.artist for t in tracks[:20]}),
        "sample_tracks": [f"{t.artist} - {t.title}" for t in tracks[:15]],
    }


async def get_weekly_rec_count(db: AsyncSession, user_id: int) -> int:
    """Count recommendations generated this week (since Monday) for rate limiting."""
    now = datetime.now(timezone.utc)
    week_start = (now - timedelta(days=now.weekday())).replace(hour=0, minute=0, second=0, microsecond=0)
    result = await db.execute(
        select(func.count(Recommendation.id)).where(
            Recommendation.user_id == user_id,
            Recommendation.created_at >= week_start,
        )
    )
    return result.scalar() or 0


MODE_PROMPTS = {
    "discover": "Find music they'll love. Mix well-known and underground artists.",
    "sonic_twin": "Find underground or lesser-known artists who sound nearly identical to their favorites. Focus on artists under 100K monthly listeners who share the same sonic qualities — similar vocal style, production approach, tempo, and energy.",
    "era_bridge": "Suggest classic artists from earlier eras who directly inspired their current favorites. Trace musical lineage — if they love Tame Impala, suggest the 70s psych rock that influenced him. Bridge eras.",
    "deep_cuts": "Find B-sides, album tracks, rarities, and lesser-known songs from artists already in their library. Focus on tracks they probably haven't heard even from artists they already know.",
    "rising": "Find artists with under 50K monthly listeners who match their taste. Focus on brand new, up-and-coming artists who haven't broken through yet. Think artists who just released their debut album or EP.",
}


def build_adventurousness_prompt(level: int) -> str:
    if level <= 2:
        return "Stick very close to their existing taste. Recommend artists who are very similar to what they already listen to."
    elif level == 3:
        return "Balance familiar and new. Mix artists similar to their taste with some that push boundaries."
    else:
        return "Be adventurous. Recommend artists that are different from their usual taste but share underlying qualities they'd appreciate. Push boundaries."


async def generate_recommendations(
    db: AsyncSession,
    user: User,
    playlist_id: int | None = None,
    query: str | None = None,
    bandcamp_mode: bool = False,
    mode: str = "discover",
    adventurousness: int = 3,
    exclude: str | None = None,
    count: int = 5,
) -> tuple[list[Recommendation], int | None]:
    """Generate AI music recommendations using Claude."""

    # Rate limit check for free users
    remaining = None
    if not user.is_pro:
        used_this_week = await get_weekly_rec_count(db, user.id)
        remaining = max(0, settings.FREE_WEEKLY_RECOMMENDATIONS - used_this_week)
        if remaining <= 0:
            return [], 0

    # Gather context
    taste_context = ""
    existing_tracks = set()

    if playlist_id:
        result = await db.execute(
            select(Playlist).where(Playlist.id == playlist_id, Playlist.user_id == user.id)
        )
        playlist = result.scalar_one_or_none()
        if playlist:
            result = await db.execute(
                select(Track).where(Track.playlist_id == playlist.id)
            )
            tracks = list(result.scalars().all())
            existing_tracks = {f"{t.artist} - {t.title}".lower() for t in tracks}
            profile = build_taste_profile(tracks)
            taste_context = f"Taste profile from playlist '{playlist.name}':\n{json.dumps(profile, indent=2)}"
    else:
        # Gather from all user playlists
        result = await db.execute(
            select(Playlist).where(Playlist.user_id == user.id)
        )
        playlists = list(result.scalars().all())
        all_tracks = []
        for p in playlists:
            result = await db.execute(select(Track).where(Track.playlist_id == p.id))
            all_tracks.extend(result.scalars().all())
        existing_tracks = {f"{t.artist} - {t.title}".lower() for t in all_tracks}
        if all_tracks:
            profile = build_taste_profile(all_tracks)
            taste_context = f"Taste profile from {len(all_tracks)} tracks:\n{json.dumps(profile, indent=2)}"

    # Load disliked artists to exclude
    disliked_result = await db.execute(
        select(Recommendation.artist).where(
            Recommendation.user_id == user.id,
            Recommendation.disliked == True,
        )
    )
    disliked_artists = list({a for a in disliked_result.scalars().all()})

    # Build prompt
    user_request = query or "Find me music I'll love based on my taste profile. Prioritize lesser-known artists and hidden gems."

    if bandcamp_mode:
        focus_instruction = "IMPORTANT: Strongly prioritize independent and underground artists who release music on Bandcamp. Think DIY, indie labels, self-released artists, and the kind of music you'd find crate-digging on Bandcamp. Focus on artists who self-publish or release on small indie labels."
    else:
        focus_instruction = "Focus on discovery - prioritize lesser-known artists, deep cuts, and hidden gems over obvious popular choices."

    # Mode-specific instruction
    mode_instruction = MODE_PROMPTS.get(mode, MODE_PROMPTS["discover"])

    # Adventurousness instruction
    adventurousness_instruction = build_adventurousness_prompt(adventurousness)

    # Exclude genres instruction
    exclude_instruction = ""
    combined_exclude = exclude or ""
    if user.blocked_genres:
        combined_exclude = f"{user.blocked_genres}, {combined_exclude}" if combined_exclude else user.blocked_genres
    if combined_exclude.strip():
        exclude_instruction = f"\nDo NOT recommend anything in these genres/moods: {combined_exclude}"

    # Disliked artists exclusion
    disliked_instruction = ""
    if disliked_artists:
        disliked_instruction = f"\nDo NOT recommend anything by these artists (user disliked them): {', '.join(disliked_artists[:30])}"

    prompt = f"""You are Vynl, an AI music discovery assistant. You help people discover new music they'll love.

{taste_context}

User request: {user_request}

Discovery mode: {mode_instruction}

{adventurousness_instruction}

IMPORTANT: If the user mentions specific artists or songs in their request, do NOT recommend anything BY those artists. The user already knows them — recommend music by OTHER artists that match the vibe. For example, if they say "I like Sublime", recommend artists similar to Sublime, but NEVER Sublime themselves.

Already in their library (do NOT recommend these):
{', '.join(list(existing_tracks)[:50]) if existing_tracks else 'None provided'}
{disliked_instruction}
{exclude_instruction}

Respond with exactly {count} music recommendations as a JSON array. Only recommend songs that actually exist — do not invent or guess song titles. Each item should have:
- "title": song title
- "artist": artist name
- "album": album name (if known)
- "reason": A warm, personal 2-3 sentence explanation of WHY they'll love this track. Reference specific qualities from their taste profile. Be specific about sonic qualities, not generic.
- "score": confidence score 0.0-1.0

{focus_instruction}
Return ONLY the JSON array, no other text."""

    # Call Claude API
    client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
    message = client.messages.create(
        model="claude-haiku-4-5-20251001",
        max_tokens=2000,
        messages=[{"role": "user", "content": prompt}],
    )

    # Parse response
    response_text = message.content[0].text.strip()
    # Handle potential markdown code blocks
    if response_text.startswith("```"):
        response_text = response_text.split("\n", 1)[1]
        response_text = response_text.rsplit("```", 1)[0]

    try:
        recs_data = json.loads(response_text)
    except json.JSONDecodeError:
        return [], remaining

    # Save to DB with YouTube Music links
    recommendations = []
    for rec in recs_data:
        if len(recommendations) >= count:
            break

        artist = rec.get("artist", "Unknown")
        title = rec.get("title", "Unknown")
        reason = rec.get("reason", "")

        youtube_url = f"https://music.youtube.com/search?q={quote_plus(f'{artist} {title}')}"

        r = Recommendation(
            user_id=user.id,
            playlist_id=playlist_id,
            title=title,
            artist=artist,
            album=rec.get("album"),
            reason=reason,
            score=rec.get("score"),
            query=query,
            youtube_url=youtube_url,
        )
        db.add(r)
        recommendations.append(r)

    await db.flush()

    if remaining is not None:
        remaining = max(0, remaining - len(recommendations))

    return recommendations, remaining