cmm-report-analyzer/app/services/batch.py

from __future__ import annotations

import asyncio
import logging
import uuid
from dataclasses import dataclass, field
from pathlib import Path

from app.ai.summarizer import summarize
from app.analysis.charts import generate_charts
from app.analysis.spc import SPCResult, calculate_spc
from app.parsers.base import get_parser
from app.parsers.models import ParsedReport

logger = logging.getLogger(__name__)

# In-memory store: batch_id → BatchResult
_store: dict[str, BatchResult] = {}


@dataclass
class FileResult:
    filename: str
    report: dict
    spc: list[dict]
    charts: dict
    summary: str
    error: str | None = None

    def to_dict(self) -> dict:
        return {
            "filename": self.filename,
            "report": self.report,
            "spc": self.spc,
            "charts": self.charts,
            "summary": self.summary,
            "error": self.error,
        }


@dataclass
class BatchResult:
    batch_id: str
    status: str = "processing"
    files: list[FileResult] = field(default_factory=list)

    def to_dict(self) -> dict:
        return {
            "batch_id": self.batch_id,
            "status": self.status,
            "files": [f.to_dict() for f in self.files],
        }


async def process_batch(file_paths: list[Path]) -> str:
    """Process a batch of files and return the batch_id."""
    batch_id = uuid.uuid4().hex[:12]
    batch = BatchResult(batch_id=batch_id)
    _store[batch_id] = batch

    tasks = [_process_single(path) for path in file_paths]
    results = await asyncio.gather(*tasks, return_exceptions=True)

    for result in results:
        if isinstance(result, Exception):
            logger.error("File processing failed: %s", result)
            batch.files.append(
                FileResult(
                    filename="unknown",
                    report={}, spc=[], charts={}, summary="",
                    error=str(result),
                )
            )
        else:
            batch.files.append(result)

    batch.status = "complete"
    return batch_id


async def _process_single(path: Path) -> FileResult:
    """Parse → SPC → Charts → AI summary for a single file."""
    try:
        parser = get_parser(path.name)
        report: ParsedReport = parser.parse(path)

        spc_results: list[SPCResult] = calculate_spc(report.measurements)
        charts = generate_charts(spc_results)
        summary = await summarize(report, spc_results)

        return FileResult(
            filename=report.filename,
            report=report.to_dict(),
            spc=[s.to_dict() for s in spc_results],
            charts=charts,
            summary=summary,
        )
    except Exception as exc:
        logger.exception("Error processing %s", path.name)
        return FileResult(
            filename=path.name,
            report={}, spc=[], charts={}, summary="",
            error=str(exc),
        )


def get_batch(batch_id: str) -> BatchResult | None:
    return _store.get(batch_id)