commit 9abf9b4b58fe7a50d2fbcc2a18567d4af0c9094d
Author: chrisryn <chrisryn@deathstar-home.one>
Date:   Thu Feb 19 10:38:51 2026 -0600

    Initial commit: CMM Report Analyzer
    
    FastAPI app that parses CMM inspection reports (PDF/Excel/CSV),
    computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk),
    generates interactive Plotly charts, and provides AI-powered quality
    summaries via Azure OpenAI with graceful fallback.
    
    Includes 21 passing tests covering parsers, SPC calculations, and
    API endpoints.

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..d531d44
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,4 @@
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
+AZURE_OPENAI_API_KEY=your-key-here
+AZURE_OPENAI_DEPLOYMENT=gpt-4o
+AZURE_OPENAI_API_VERSION=2024-10-21
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..60ee068
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+__pycache__/
+*.py[cod]
+*.egg-info/
+dist/
+build/
+.env
+.venv/
+venv/
+*.tmp
+uploads/
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/ai/__init__.py b/app/ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/ai/summarizer.py b/app/ai/summarizer.py
new file mode 100644
index 0000000..3464804
--- /dev/null
+++ b/app/ai/summarizer.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import logging
+
+from openai import AsyncAzureOpenAI
+
+from app.analysis.spc import SPCResult
+from app.config import settings
+from app.parsers.models import ParsedReport
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = """\
+You are a quality engineer reviewing CMM (Coordinate Measuring Machine) inspection data.
+Provide a concise, actionable summary that includes:
+1. Overall pass/fail assessment
+2. Features of concern (low Cpk, out-of-tolerance, trends)
+3. Root-cause hypotheses for any deviations
+4. Recommended corrective actions
+Use precise engineering language. Reference feature names and numeric values."""
+
+
+async def summarize(
+    report: ParsedReport, spc_results: list[SPCResult]
+) -> str:
+    """Generate an AI-powered quality summary. Returns fallback text on failure."""
+    if not settings.azure_openai_endpoint or not settings.azure_openai_api_key:
+        return _fallback_summary(report, spc_results)
+
+    spc_text = _format_spc(spc_results)
+    user_msg = (
+        f"File: {report.filename}\n"
+        f"Measurements: {len(report.measurements)}, "
+        f"Out of tolerance: {len(report.out_of_tolerance)}\n\n"
+        f"SPC Results:\n{spc_text}\n\n"
+        f"Raw report excerpt:\n{report.raw_text[:3000]}"
+    )
+
+    try:
+        client = AsyncAzureOpenAI(
+            azure_endpoint=settings.azure_openai_endpoint,
+            api_key=settings.azure_openai_api_key,
+            api_version=settings.azure_openai_api_version,
+        )
+        response = await client.chat.completions.create(
+            model=settings.azure_openai_deployment,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
+            ],
+            temperature=0.3,
+            max_tokens=1024,
+        )
+        return response.choices[0].message.content or _fallback_summary(
+            report, spc_results
+        )
+    except Exception:
+        logger.exception("Azure OpenAI call failed, using fallback summary")
+        return _fallback_summary(report, spc_results)
+
+
+def _format_spc(results: list[SPCResult]) -> str:
+    lines: list[str] = []
+    for r in results:
+        cpk_str = f"{r.cpk:.3f}" if r.cpk is not None else "N/A"
+        ppk_str = f"{r.ppk:.3f}" if r.ppk is not None else "N/A"
+        lines.append(
+            f"  {r.feature_name}: n={r.n}, mean={r.mean:.4f}, "
+            f"Cpk={cpk_str}, Ppk={ppk_str}, OOS={r.out_of_spec_count}"
+        )
+    return "\n".join(lines) if lines else "  No SPC data available."
+
+
+def _fallback_summary(report: ParsedReport, spc_results: list[SPCResult]) -> str:
+    total = len(report.measurements)
+    oot = len(report.out_of_tolerance)
+    status = "PASS" if oot == 0 else "FAIL"
+
+    lines = [
+        f"**Inspection Summary for {report.filename}**",
+        f"Status: **{status}** — {total} measurements, {oot} out of tolerance.",
+        "",
+    ]
+    if oot > 0:
+        lines.append("Out-of-tolerance features:")
+        for m in report.out_of_tolerance:
+            lines.append(
+                f"  - {m.feature_name}: actual={m.actual:.4f}, "
+                f"nominal={m.nominal:.4f}, tolerance=[{m.lsl:.4f}, {m.usl:.4f}]"
+            )
+        lines.append("")
+
+    for r in spc_results:
+        if r.cpk is not None and r.cpk < 1.0:
+            lines.append(
+                f"  Warning: {r.feature_name} Cpk={r.cpk:.3f} (below 1.0)"
+            )
+
+    lines.append("")
+    lines.append("*(AI summary unavailable — configure Azure OpenAI for enhanced analysis)*")
+    return "\n".join(lines)
diff --git a/app/analysis/__init__.py b/app/analysis/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/analysis/charts.py b/app/analysis/charts.py
new file mode 100644
index 0000000..dcccde7
--- /dev/null
+++ b/app/analysis/charts.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import plotly.graph_objects as go
+
+from app.analysis.spc import SPCResult
+
+
+def histogram(result: SPCResult) -> dict:
+    """Distribution histogram with spec limits overlay."""
+    fig = go.Figure()
+    fig.add_trace(go.Histogram(x=result.values, name="Measurements", nbinsx=20))
+    fig.add_vline(x=result.usl, line_dash="dash", line_color="red",
+                  annotation_text="USL")
+    fig.add_vline(x=result.lsl, line_dash="dash", line_color="red",
+                  annotation_text="LSL")
+    fig.add_vline(x=result.nominal, line_dash="dot", line_color="green",
+                  annotation_text="Nominal")
+    fig.update_layout(
+        title=f"Distribution – {result.feature_name}",
+        xaxis_title="Value", yaxis_title="Count",
+        template="plotly_white", height=350,
+    )
+    return fig.to_plotly_json()
+
+
+def control_chart(result: SPCResult) -> dict:
+    """Individual values control chart (I-chart)."""
+    x_axis = list(range(1, result.n + 1))
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=x_axis, y=result.values, mode="lines+markers", name="Value",
+    ))
+    fig.add_hline(y=result.mean, line_color="green", annotation_text="Mean")
+    fig.add_hline(y=result.ucl, line_dash="dash", line_color="red",
+                  annotation_text="UCL")
+    fig.add_hline(y=result.lcl, line_dash="dash", line_color="red",
+                  annotation_text="LCL")
+    fig.add_hline(y=result.usl, line_dash="dot", line_color="orange",
+                  annotation_text="USL")
+    fig.add_hline(y=result.lsl, line_dash="dot", line_color="orange",
+                  annotation_text="LSL")
+    fig.update_layout(
+        title=f"Control Chart – {result.feature_name}",
+        xaxis_title="Sample #", yaxis_title="Value",
+        template="plotly_white", height=350,
+    )
+    return fig.to_plotly_json()
+
+
+def capability_bar(results: list[SPCResult]) -> dict:
+    """Capability index bar chart comparing all features."""
+    names = [r.feature_name for r in results]
+    cpk_vals = [r.cpk if r.cpk is not None else 0.0 for r in results]
+    ppk_vals = [r.ppk if r.ppk is not None else 0.0 for r in results]
+
+    colors = ["#2ecc71" if v >= 1.33 else "#f39c12" if v >= 1.0 else "#e74c3c"
+              for v in cpk_vals]
+
+    fig = go.Figure()
+    fig.add_trace(go.Bar(x=names, y=cpk_vals, name="Cpk", marker_color=colors))
+    fig.add_trace(go.Bar(x=names, y=ppk_vals, name="Ppk", marker_color="rgba(52,152,219,0.6)"))
+    fig.add_hline(y=1.33, line_dash="dash", line_color="green",
+                  annotation_text="Cpk=1.33")
+    fig.add_hline(y=1.0, line_dash="dot", line_color="orange",
+                  annotation_text="Cpk=1.0")
+    fig.update_layout(
+        title="Process Capability Summary",
+        xaxis_title="Feature", yaxis_title="Index",
+        barmode="group", template="plotly_white", height=400,
+    )
+    return fig.to_plotly_json()
+
+
+def generate_charts(results: list[SPCResult]) -> dict:
+    """Generate all charts for a set of SPC results."""
+    charts: dict[str, list[dict] | dict] = {
+        "histograms": [],
+        "control_charts": [],
+    }
+    for r in results:
+        if r.n >= 2:
+            charts["histograms"].append(histogram(r))
+            charts["control_charts"].append(control_chart(r))
+    if results:
+        charts["capability_bar"] = capability_bar(results)
+    return charts
diff --git a/app/analysis/spc.py b/app/analysis/spc.py
new file mode 100644
index 0000000..bd52cfe
--- /dev/null
+++ b/app/analysis/spc.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+from scipy import stats
+
+from app.parsers.models import MeasurementRecord
+
+# d2 constants for subgroup sizes 2–10 (R-bar/d2 method)
+_D2 = {2: 1.128, 3: 1.693, 4: 2.059, 5: 2.326, 6: 2.534, 7: 2.704, 8: 2.847, 9: 2.970, 10: 3.078}
+
+
+@dataclass
+class SPCResult:
+    feature_name: str
+    n: int
+    mean: float
+    std: float
+    min_val: float
+    max_val: float
+    usl: float
+    lsl: float
+    nominal: float
+    cp: float | None
+    cpk: float | None
+    pp: float | None
+    ppk: float | None
+    ucl: float
+    lcl: float
+    out_of_spec_count: int
+    shapiro_p: float | None
+    values: list[float]
+
+    def to_dict(self) -> dict:
+        return {
+            "feature_name": self.feature_name,
+            "n": self.n,
+            "mean": round(self.mean, 6),
+            "std": round(self.std, 6),
+            "min": round(self.min_val, 6),
+            "max": round(self.max_val, 6),
+            "usl": round(self.usl, 6),
+            "lsl": round(self.lsl, 6),
+            "nominal": round(self.nominal, 6),
+            "cp": _r(self.cp),
+            "cpk": _r(self.cpk),
+            "pp": _r(self.pp),
+            "ppk": _r(self.ppk),
+            "ucl": round(self.ucl, 6),
+            "lcl": round(self.lcl, 6),
+            "out_of_spec_count": self.out_of_spec_count,
+            "shapiro_p": _r(self.shapiro_p),
+            "values": [round(v, 6) for v in self.values],
+        }
+
+
+def _r(v: float | None) -> float | None:
+    return round(v, 4) if v is not None else None
+
+
+def calculate_spc(
+    records: list[MeasurementRecord], subgroup_size: int = 5
+) -> list[SPCResult]:
+    """Calculate SPC metrics grouped by feature name."""
+    groups: dict[str, list[MeasurementRecord]] = {}
+    for rec in records:
+        groups.setdefault(rec.feature_name, []).append(rec)
+
+    results: list[SPCResult] = []
+    for name, recs in groups.items():
+        values = [r.actual for r in recs]
+        n = len(values)
+        if n < 1:
+            continue
+
+        arr = np.array(values)
+        mean = float(np.mean(arr))
+        usl = recs[0].usl
+        lsl = recs[0].lsl
+        nominal = recs[0].nominal
+
+        if n < 2:
+            results.append(
+                SPCResult(
+                    feature_name=name, n=n, mean=mean, std=0.0,
+                    min_val=values[0], max_val=values[0],
+                    usl=usl, lsl=lsl, nominal=nominal,
+                    cp=None, cpk=None, pp=None, ppk=None,
+                    ucl=mean, lcl=mean,
+                    out_of_spec_count=sum(1 for v in values if v < lsl or v > usl),
+                    shapiro_p=None, values=values,
+                )
+            )
+            continue
+
+        std_overall = float(np.std(arr, ddof=1))
+        tol_range = usl - lsl
+
+        # Pp, Ppk (overall)
+        pp = tol_range / (6 * std_overall) if std_overall > 0 else None
+        ppk = (
+            min((usl - mean), (mean - lsl)) / (3 * std_overall)
+            if std_overall > 0
+            else None
+        )
+
+        # Cp, Cpk (within-subgroup using R-bar/d2)
+        std_within = _within_subgroup_sigma(arr, subgroup_size)
+        cp = tol_range / (6 * std_within) if std_within and std_within > 0 else None
+        cpk = (
+            min((usl - mean), (mean - lsl)) / (3 * std_within)
+            if std_within and std_within > 0
+            else None
+        )
+
+        # Control limits (X-bar chart, 3-sigma)
+        ucl = mean + 3 * std_overall
+        lcl = mean - 3 * std_overall
+
+        # Shapiro-Wilk normality test (need 3 ≤ n ≤ 5000)
+        shapiro_p = None
+        if 3 <= n <= 5000:
+            _, shapiro_p = stats.shapiro(arr)
+            shapiro_p = float(shapiro_p)
+
+        out_of_spec = sum(1 for v in values if v < lsl or v > usl)
+
+        results.append(
+            SPCResult(
+                feature_name=name, n=n, mean=mean, std=std_overall,
+                min_val=float(np.min(arr)), max_val=float(np.max(arr)),
+                usl=usl, lsl=lsl, nominal=nominal,
+                cp=cp, cpk=cpk, pp=pp, ppk=ppk,
+                ucl=ucl, lcl=lcl,
+                out_of_spec_count=out_of_spec,
+                shapiro_p=shapiro_p, values=values,
+            )
+        )
+    return results
+
+
+def _within_subgroup_sigma(arr: np.ndarray, subgroup_size: int) -> float | None:
+    """Estimate within-subgroup sigma using R-bar / d2 method."""
+    n = len(arr)
+    if n < 2:
+        return None
+
+    sg = min(subgroup_size, n)
+    d2 = _D2.get(sg)
+    if d2 is None:
+        # Fallback: use overall std if subgroup size not in d2 table
+        return float(np.std(arr, ddof=1))
+
+    ranges: list[float] = []
+    for i in range(0, n - sg + 1, sg):
+        subgroup = arr[i : i + sg]
+        if len(subgroup) >= 2:
+            ranges.append(float(np.max(subgroup) - np.min(subgroup)))
+
+    if not ranges:
+        return float(np.std(arr, ddof=1))
+
+    r_bar = float(np.mean(ranges))
+    return r_bar / d2
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000..78bcfc9
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,14 @@
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+    azure_openai_endpoint: str = ""
+    azure_openai_api_key: str = ""
+    azure_openai_deployment: str = "gpt-4o"
+    azure_openai_api_version: str = "2024-10-21"
+    max_upload_mb: int = 50
+
+    model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
+
+
+settings = Settings()
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..ef2f2ae
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+
+from app.routers import upload, results
+
+app = FastAPI(title="CMM Report Analyzer")
+
+app.include_router(upload.router, prefix="/api")
+app.include_router(results.router, prefix="/api")
+
+app.mount("/", StaticFiles(directory=Path(__file__).parent / "static", html=True))
diff --git a/app/parsers/__init__.py b/app/parsers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/parsers/base.py b/app/parsers/base.py
new file mode 100644
index 0000000..e6016df
--- /dev/null
+++ b/app/parsers/base.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import re
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from app.parsers.models import ParsedReport
+
+# Fuzzy column-name patterns → canonical field name
+COLUMN_PATTERNS: dict[str, list[re.Pattern[str]]] = {
+    "feature_name": [
+        re.compile(r"feat|char|dimen|label|id|name|item", re.I),
+    ],
+    "nominal": [
+        re.compile(r"nom|target|blueprint|print", re.I),
+    ],
+    "tolerance_plus": [
+        re.compile(r"tol.*\+|upper.*tol|\+.*tol|usl|dev.*\+|pos.*tol", re.I),
+    ],
+    "tolerance_minus": [
+        re.compile(r"tol.*-|lower.*tol|-.*tol|lsl|dev.*-|neg.*tol", re.I),
+    ],
+    "actual": [
+        re.compile(r"actual|meas|value|result|reading", re.I),
+    ],
+    "deviation": [
+        re.compile(r"dev(?!.*tol)|diff|error|delta", re.I),
+    ],
+}
+
+
+def match_column(header: str) -> str | None:
+    """Return the canonical field name for a header string, or None."""
+    header = header.strip()
+    for field_name, patterns in COLUMN_PATTERNS.items():
+        for pat in patterns:
+            if pat.search(header):
+                return field_name
+    return None
+
+
+class CMMParser(ABC):
+    @abstractmethod
+    def parse(self, path: Path) -> ParsedReport: ...
+
+
+def get_parser(filename: str) -> CMMParser:
+    suffix = Path(filename).suffix.lower()
+    if suffix == ".pdf":
+        from app.parsers.pdf_parser import PDFParser
+        return PDFParser()
+    if suffix in (".xlsx", ".xls", ".csv"):
+        from app.parsers.excel_parser import ExcelParser
+        return ExcelParser()
+    raise ValueError(f"Unsupported file type: {suffix}")
diff --git a/app/parsers/excel_parser.py b/app/parsers/excel_parser.py
new file mode 100644
index 0000000..b2f41cb
--- /dev/null
+++ b/app/parsers/excel_parser.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+from app.parsers.base import CMMParser, match_column
+from app.parsers.models import MeasurementRecord, ParsedReport
+
+
+class ExcelParser(CMMParser):
+    def parse(self, path: Path) -> ParsedReport:
+        if path.suffix.lower() == ".csv":
+            df = pd.read_csv(path)
+        else:
+            df = pd.read_excel(path, engine="openpyxl")
+
+        col_map = self._map_columns(df.columns.tolist())
+        measurements = self._extract(df, col_map)
+        return ParsedReport(
+            filename=path.name,
+            measurements=measurements,
+            metadata={"source": "excel", "rows": str(len(df))},
+            raw_text=df.to_string(max_rows=200),
+        )
+
+    def _map_columns(self, headers: list[str]) -> dict[str, str]:
+        """Map canonical field names to actual DataFrame column names."""
+        mapping: dict[str, str] = {}
+        for header in headers:
+            canonical = match_column(str(header))
+            if canonical and canonical not in mapping:
+                mapping[canonical] = str(header)
+        return mapping
+
+    def _extract(
+        self, df: pd.DataFrame, col_map: dict[str, str]
+    ) -> list[MeasurementRecord]:
+        required = {"feature_name", "nominal", "actual"}
+        if not required.issubset(col_map):
+            return self._fallback_extract(df)
+
+        records: list[MeasurementRecord] = []
+        for _, row in df.iterrows():
+            try:
+                nominal = float(row[col_map["nominal"]])
+                actual = float(row[col_map["actual"]])
+                tol_plus = (
+                    float(row[col_map["tolerance_plus"]])
+                    if "tolerance_plus" in col_map
+                    else 0.0
+                )
+                tol_minus = (
+                    float(row[col_map["tolerance_minus"]])
+                    if "tolerance_minus" in col_map
+                    else 0.0
+                )
+                deviation = (
+                    float(row[col_map["deviation"]])
+                    if "deviation" in col_map
+                    else actual - nominal
+                )
+                records.append(
+                    MeasurementRecord(
+                        feature_name=str(row[col_map["feature_name"]]),
+                        nominal=nominal,
+                        tolerance_plus=abs(tol_plus),
+                        tolerance_minus=-abs(tol_minus),
+                        actual=actual,
+                        deviation=deviation,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records
+
+    def _fallback_extract(self, df: pd.DataFrame) -> list[MeasurementRecord]:
+        """Best-effort extraction when column mapping is incomplete.
+
+        Treats the first string column as the feature name and the first
+        three numeric columns as nominal, actual, tolerance_plus (with
+        tolerance_minus mirrored).
+        """
+        numeric_cols = df.select_dtypes(include="number").columns.tolist()
+        str_cols = df.select_dtypes(include="object").columns.tolist()
+        if len(numeric_cols) < 2 or not str_cols:
+            return []
+
+        name_col = str_cols[0]
+        nom_col = numeric_cols[0]
+        act_col = numeric_cols[1]
+        tol_col = numeric_cols[2] if len(numeric_cols) > 2 else None
+
+        records: list[MeasurementRecord] = []
+        for _, row in df.iterrows():
+            try:
+                nominal = float(row[nom_col])
+                actual = float(row[act_col])
+                tol = float(row[tol_col]) if tol_col else 0.0
+                records.append(
+                    MeasurementRecord(
+                        feature_name=str(row[name_col]),
+                        nominal=nominal,
+                        tolerance_plus=abs(tol),
+                        tolerance_minus=-abs(tol),
+                        actual=actual,
+                        deviation=actual - nominal,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records
diff --git a/app/parsers/models.py b/app/parsers/models.py
new file mode 100644
index 0000000..f47bbad
--- /dev/null
+++ b/app/parsers/models.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class MeasurementRecord:
+    feature_name: str
+    nominal: float
+    tolerance_plus: float
+    tolerance_minus: float
+    actual: float
+    deviation: float = 0.0
+    unit: str = "mm"
+
+    @property
+    def usl(self) -> float:
+        return self.nominal + self.tolerance_plus
+
+    @property
+    def lsl(self) -> float:
+        return self.nominal + self.tolerance_minus  # tolerance_minus is negative
+
+    @property
+    def in_tolerance(self) -> bool:
+        return self.lsl <= self.actual <= self.usl
+
+    def to_dict(self) -> dict:
+        return {
+            "feature_name": self.feature_name,
+            "nominal": self.nominal,
+            "tolerance_plus": self.tolerance_plus,
+            "tolerance_minus": self.tolerance_minus,
+            "actual": self.actual,
+            "deviation": self.deviation,
+            "unit": self.unit,
+            "usl": self.usl,
+            "lsl": self.lsl,
+            "in_tolerance": self.in_tolerance,
+        }
+
+
+@dataclass
+class ParsedReport:
+    filename: str
+    measurements: list[MeasurementRecord] = field(default_factory=list)
+    metadata: dict[str, str] = field(default_factory=dict)
+    raw_text: str = ""
+
+    @property
+    def out_of_tolerance(self) -> list[MeasurementRecord]:
+        return [m for m in self.measurements if not m.in_tolerance]
+
+    def to_dict(self) -> dict:
+        return {
+            "filename": self.filename,
+            "metadata": self.metadata,
+            "measurement_count": len(self.measurements),
+            "out_of_tolerance_count": len(self.out_of_tolerance),
+            "measurements": [m.to_dict() for m in self.measurements],
+        }
diff --git a/app/parsers/pdf_parser.py b/app/parsers/pdf_parser.py
new file mode 100644
index 0000000..0200533
--- /dev/null
+++ b/app/parsers/pdf_parser.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pdfplumber
+
+from app.parsers.base import CMMParser, match_column
+from app.parsers.models import MeasurementRecord, ParsedReport
+
+
+class PDFParser(CMMParser):
+    def parse(self, path: Path) -> ParsedReport:
+        text_parts: list[str] = []
+        all_rows: list[dict[str, str | None]] = []
+        headers: list[str] = []
+
+        with pdfplumber.open(path) as pdf:
+            for page in pdf.pages:
+                page_text = page.extract_text() or ""
+                text_parts.append(page_text)
+
+                for table in page.extract_tables():
+                    if not table or not table[0]:
+                        continue
+                    if not headers:
+                        headers = [str(c or "").strip() for c in table[0]]
+                        data_rows = table[1:]
+                    else:
+                        data_rows = table
+                    for row in data_rows:
+                        if row and any(cell for cell in row):
+                            all_rows.append(
+                                {
+                                    headers[i]: (str(cell).strip() if cell else None)
+                                    for i, cell in enumerate(row)
+                                    if i < len(headers)
+                                }
+                            )
+
+        raw_text = "\n".join(text_parts)
+        col_map = {match_column(h): h for h in headers if match_column(h)}
+        measurements = self._extract(all_rows, col_map)
+        metadata = self._extract_metadata(raw_text)
+        metadata["source"] = "pdf"
+
+        return ParsedReport(
+            filename=path.name,
+            measurements=measurements,
+            metadata=metadata,
+            raw_text=raw_text[:10_000],
+        )
+
+    def _extract(
+        self,
+        rows: list[dict[str, str | None]],
+        col_map: dict[str | None, str],
+    ) -> list[MeasurementRecord]:
+        required = {"feature_name", "nominal", "actual"}
+        if not required.issubset(col_map):
+            return self._fallback_extract(rows)
+
+        records: list[MeasurementRecord] = []
+        for row in rows:
+            try:
+                name = row.get(col_map["feature_name"]) or ""
+                nominal = _to_float(row.get(col_map["nominal"]))
+                actual = _to_float(row.get(col_map["actual"]))
+                if nominal is None or actual is None or not name:
+                    continue
+                tol_plus = (
+                    _to_float(row.get(col_map.get("tolerance_plus", ""), "")) or 0.0
+                )
+                tol_minus = (
+                    _to_float(row.get(col_map.get("tolerance_minus", ""), "")) or 0.0
+                )
+                deviation = (
+                    _to_float(row.get(col_map.get("deviation", ""), ""))
+                    or actual - nominal
+                )
+                records.append(
+                    MeasurementRecord(
+                        feature_name=name,
+                        nominal=nominal,
+                        tolerance_plus=abs(tol_plus),
+                        tolerance_minus=-abs(tol_minus),
+                        actual=actual,
+                        deviation=deviation,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records
+
+    def _fallback_extract(
+        self, rows: list[dict[str, str | None]]
+    ) -> list[MeasurementRecord]:
+        """Try to extract from rows even without full column mapping."""
+        if not rows:
+            return []
+        headers = list(rows[0].keys())
+        # Heuristic: first string-looking column = name, then look for numeric columns
+        numeric_cols: list[str] = []
+        name_col: str | None = None
+        for h in headers:
+            sample_vals = [r.get(h) for r in rows[:5] if r.get(h)]
+            if sample_vals and all(_to_float(v) is not None for v in sample_vals):
+                numeric_cols.append(h)
+            elif name_col is None and sample_vals:
+                name_col = h
+        if not name_col or len(numeric_cols) < 2:
+            return []
+
+        records: list[MeasurementRecord] = []
+        for row in rows:
+            try:
+                name = row.get(name_col) or ""
+                nominal = _to_float(row.get(numeric_cols[0]))
+                actual = _to_float(row.get(numeric_cols[1]))
+                if nominal is None or actual is None or not name:
+                    continue
+                tol = _to_float(row.get(numeric_cols[2])) if len(numeric_cols) > 2 else 0.0
+                tol = tol or 0.0
+                records.append(
+                    MeasurementRecord(
+                        feature_name=name,
+                        nominal=nominal,
+                        tolerance_plus=abs(tol),
+                        tolerance_minus=-abs(tol),
+                        actual=actual,
+                        deviation=actual - nominal,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records
+
+    def _extract_metadata(self, text: str) -> dict[str, str]:
+        metadata: dict[str, str] = {}
+        import re
+
+        for pattern, key in [
+            (r"(?i)part\s*(?:no|number|#|:)\s*[:\s]*(\S+)", "part_number"),
+            (r"(?i)serial\s*(?:no|number|#|:)\s*[:\s]*(\S+)", "serial_number"),
+            (r"(?i)date\s*[:\s]+(\d[\d/\-\.]+\d)", "inspection_date"),
+            (r"(?i)program\s*[:\s]+(.+?)(?:\n|$)", "program"),
+            (r"(?i)operator\s*[:\s]+(.+?)(?:\n|$)", "operator"),
+        ]:
+            m = re.search(pattern, text)
+            if m:
+                metadata[key] = m.group(1).strip()
+        return metadata
+
+
+def _to_float(val: str | None) -> float | None:
+    if val is None:
+        return None
+    val = val.strip().replace(",", "")
+    try:
+        return float(val)
+    except ValueError:
+        return None
diff --git a/app/routers/__init__.py b/app/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/results.py b/app/routers/results.py
new file mode 100644
index 0000000..e66f018
--- /dev/null
+++ b/app/routers/results.py
@@ -0,0 +1,13 @@
+from fastapi import APIRouter, HTTPException
+
+from app.services.batch import get_batch
+
+router = APIRouter()
+
+
+@router.get("/results/{batch_id}")
+async def get_results(batch_id: str):
+    batch = get_batch(batch_id)
+    if batch is None:
+        raise HTTPException(404, "Batch not found")
+    return batch.to_dict()
diff --git a/app/routers/upload.py b/app/routers/upload.py
new file mode 100644
index 0000000..f36d8d5
--- /dev/null
+++ b/app/routers/upload.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException, UploadFile
+
+from app.config import settings
+from app.services.batch import process_batch
+
+router = APIRouter()
+
+ALLOWED_EXTENSIONS = {".pdf", ".xlsx", ".xls", ".csv"}
+
+
+@router.post("/upload")
+async def upload_files(files: list[UploadFile]):
+    if not files:
+        raise HTTPException(400, "No files provided")
+
+    saved: list[Path] = []
+    tmp_dir = Path(tempfile.mkdtemp(prefix="cmm_"))
+
+    for f in files:
+        if not f.filename:
+            continue
+        ext = Path(f.filename).suffix.lower()
+        if ext not in ALLOWED_EXTENSIONS:
+            raise HTTPException(
+                400, f"Unsupported file type: {ext}. Allowed: {ALLOWED_EXTENSIONS}"
+            )
+        size = 0
+        dest = tmp_dir / f.filename
+        with open(dest, "wb") as out:
+            while chunk := await f.read(1024 * 64):
+                size += len(chunk)
+                if size > settings.max_upload_mb * 1024 * 1024:
+                    raise HTTPException(400, f"File too large (max {settings.max_upload_mb} MB)")
+                out.write(chunk)
+        saved.append(dest)
+
+    if not saved:
+        raise HTTPException(400, "No valid files uploaded")
+
+    batch_id = await process_batch(saved)
+    return {"batch_id": batch_id, "file_count": len(saved)}
diff --git a/app/services/__init__.py b/app/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/services/batch.py b/app/services/batch.py
new file mode 100644
index 0000000..00b6a7b
--- /dev/null
+++ b/app/services/batch.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from app.ai.summarizer import summarize
+from app.analysis.charts import generate_charts
+from app.analysis.spc import SPCResult, calculate_spc
+from app.parsers.base import get_parser
+from app.parsers.models import ParsedReport
+
+logger = logging.getLogger(__name__)
+
+# In-memory store: batch_id → BatchResult
+_store: dict[str, BatchResult] = {}
+
+
+@dataclass
+class FileResult:
+    filename: str
+    report: dict
+    spc: list[dict]
+    charts: dict
+    summary: str
+    error: str | None = None
+
+    def to_dict(self) -> dict:
+        return {
+            "filename": self.filename,
+            "report": self.report,
+            "spc": self.spc,
+            "charts": self.charts,
+            "summary": self.summary,
+            "error": self.error,
+        }
+
+
+@dataclass
+class BatchResult:
+    batch_id: str
+    status: str = "processing"
+    files: list[FileResult] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "batch_id": self.batch_id,
+            "status": self.status,
+            "files": [f.to_dict() for f in self.files],
+        }
+
+
+async def process_batch(file_paths: list[Path]) -> str:
+    """Process a batch of files and return the batch_id."""
+    batch_id = uuid.uuid4().hex[:12]
+    batch = BatchResult(batch_id=batch_id)
+    _store[batch_id] = batch
+
+    tasks = [_process_single(path) for path in file_paths]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+    for result in results:
+        if isinstance(result, Exception):
+            logger.error("File processing failed: %s", result)
+            batch.files.append(
+                FileResult(
+                    filename="unknown",
+                    report={}, spc=[], charts={}, summary="",
+                    error=str(result),
+                )
+            )
+        else:
+            batch.files.append(result)
+
+    batch.status = "complete"
+    return batch_id
+
+
+async def _process_single(path: Path) -> FileResult:
+    """Parse → SPC → Charts → AI summary for a single file."""
+    try:
+        parser = get_parser(path.name)
+        report: ParsedReport = parser.parse(path)
+
+        spc_results: list[SPCResult] = calculate_spc(report.measurements)
+        charts = generate_charts(spc_results)
+        summary = await summarize(report, spc_results)
+
+        return FileResult(
+            filename=report.filename,
+            report=report.to_dict(),
+            spc=[s.to_dict() for s in spc_results],
+            charts=charts,
+            summary=summary,
+        )
+    except Exception as exc:
+        logger.exception("Error processing %s", path.name)
+        return FileResult(
+            filename=path.name,
+            report={}, spc=[], charts={}, summary="",
+            error=str(exc),
+        )
+
+
+def get_batch(batch_id: str) -> BatchResult | None:
+    return _store.get(batch_id)
diff --git a/app/static/css/style.css b/app/static/css/style.css
new file mode 100644
index 0000000..650285d
--- /dev/null
+++ b/app/static/css/style.css
@@ -0,0 +1,247 @@
+:root {
+  --bg: #f4f6f8;
+  --surface: #ffffff;
+  --primary: #1a5276;
+  --primary-light: #2980b9;
+  --accent: #2ecc71;
+  --danger: #e74c3c;
+  --warn: #f39c12;
+  --text: #2c3e50;
+  --text-muted: #7f8c8d;
+  --border: #dce1e6;
+  --radius: 8px;
+}
+
+*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+body {
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+  background: var(--bg);
+  color: var(--text);
+  line-height: 1.6;
+}
+
+header {
+  background: var(--primary);
+  color: #fff;
+  padding: 1.5rem 2rem;
+}
+
+header h1 {
+  font-size: 1.5rem;
+  font-weight: 600;
+  letter-spacing: 0.02em;
+}
+
+.subtitle {
+  color: rgba(255, 255, 255, 0.7);
+  font-size: 0.9rem;
+  margin-top: 0.25rem;
+}
+
+main {
+  max-width: 1200px;
+  margin: 0 auto;
+  padding: 2rem 1.5rem;
+}
+
+/* Drop zone */
+#drop-zone {
+  border: 2px dashed var(--border);
+  border-radius: var(--radius);
+  padding: 3rem 2rem;
+  text-align: center;
+  cursor: pointer;
+  transition: border-color 0.2s, background 0.2s;
+  background: var(--surface);
+}
+
+#drop-zone:hover, #drop-zone.dragover {
+  border-color: var(--primary-light);
+  background: rgba(41, 128, 185, 0.04);
+}
+
+.drop-content svg { color: var(--text-muted); margin-bottom: 0.75rem; }
+.drop-content p { color: var(--text); font-size: 1rem; }
+.drop-content .hint { color: var(--text-muted); font-size: 0.85rem; margin-top: 0.25rem; }
+
+/* File list */
+#file-list {
+  margin-top: 1rem;
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.5rem;
+}
+
+.file-tag {
+  background: var(--primary);
+  color: #fff;
+  padding: 0.3rem 0.75rem;
+  border-radius: 999px;
+  font-size: 0.85rem;
+  display: inline-flex;
+  align-items: center;
+  gap: 0.4rem;
+}
+
+.file-tag .remove {
+  cursor: pointer;
+  opacity: 0.7;
+  font-weight: bold;
+}
+
+.file-tag .remove:hover { opacity: 1; }
+
+/* Upload button */
+#upload-btn {
+  margin-top: 1rem;
+  padding: 0.6rem 2rem;
+  background: var(--primary);
+  color: #fff;
+  border: none;
+  border-radius: var(--radius);
+  font-size: 1rem;
+  cursor: pointer;
+  transition: background 0.2s;
+}
+
+#upload-btn:hover:not(:disabled) { background: var(--primary-light); }
+#upload-btn:disabled { opacity: 0.5; cursor: not-allowed; }
+
+/* Spinner */
+#status-section {
+  text-align: center;
+  padding: 3rem 0;
+}
+
+.spinner {
+  width: 40px;
+  height: 40px;
+  border: 4px solid var(--border);
+  border-top-color: var(--primary);
+  border-radius: 50%;
+  margin: 0 auto 1rem;
+  animation: spin 0.8s linear infinite;
+}
+
+@keyframes spin { to { transform: rotate(360deg); } }
+
+/* Results */
+.file-result {
+  background: var(--surface);
+  border-radius: var(--radius);
+  padding: 1.5rem;
+  margin-bottom: 1.5rem;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+}
+
+.file-result h2 {
+  font-size: 1.2rem;
+  color: var(--primary);
+  margin-bottom: 1rem;
+  padding-bottom: 0.5rem;
+  border-bottom: 2px solid var(--border);
+}
+
+.file-result .error {
+  color: var(--danger);
+  font-weight: 600;
+}
+
+/* SPC table */
+.spc-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 0.85rem;
+  margin-bottom: 1.5rem;
+  overflow-x: auto;
+  display: block;
+}
+
+.spc-table th, .spc-table td {
+  padding: 0.5rem 0.75rem;
+  text-align: right;
+  border-bottom: 1px solid var(--border);
+  white-space: nowrap;
+}
+
+.spc-table th {
+  background: var(--bg);
+  font-weight: 600;
+  text-align: right;
+  position: sticky;
+  top: 0;
+}
+
+.spc-table th:first-child, .spc-table td:first-child { text-align: left; }
+
+.spc-table tr:hover td { background: rgba(41, 128, 185, 0.04); }
+
+.cpk-good { color: var(--accent); font-weight: 600; }
+.cpk-warn { color: var(--warn); font-weight: 600; }
+.cpk-bad { color: var(--danger); font-weight: 600; }
+
+/* Summary */
+.summary {
+  background: var(--bg);
+  border-left: 4px solid var(--primary);
+  padding: 1rem 1.25rem;
+  margin-bottom: 1.5rem;
+  border-radius: 0 var(--radius) var(--radius) 0;
+  white-space: pre-wrap;
+  font-size: 0.9rem;
+  line-height: 1.7;
+}
+
+/* Charts */
+.charts-grid {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 1rem;
+  margin-bottom: 1.5rem;
+}
+
+.chart-container {
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  overflow: hidden;
+}
+
+.chart-full { grid-column: 1 / -1; }
+
+/* Measurements table */
+.meas-toggle {
+  background: none;
+  border: 1px solid var(--border);
+  padding: 0.4rem 1rem;
+  border-radius: var(--radius);
+  cursor: pointer;
+  font-size: 0.85rem;
+  color: var(--primary);
+  margin-bottom: 1rem;
+}
+
+.meas-toggle:hover { background: var(--bg); }
+
+.meas-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 0.8rem;
+}
+
+.meas-table th, .meas-table td {
+  padding: 0.4rem 0.6rem;
+  border-bottom: 1px solid var(--border);
+  text-align: right;
+}
+
+.meas-table th { background: var(--bg); font-weight: 600; }
+.meas-table th:first-child, .meas-table td:first-child { text-align: left; }
+
+.meas-table .oot { background: rgba(231, 76, 60, 0.08); color: var(--danger); }
+
+@media (max-width: 768px) {
+  .charts-grid { grid-template-columns: 1fr; }
+  main { padding: 1rem; }
+}
diff --git a/app/static/index.html b/app/static/index.html
new file mode 100644
index 0000000..229201b
--- /dev/null
+++ b/app/static/index.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>CMM Report Analyzer</title>
+  <link rel="stylesheet" href="/css/style.css">
+  <script src="https://cdn.plot.ly/plotly-2.35.0.min.js"></script>
+</head>
+<body>
+  <header>
+    <h1>CMM Report Analyzer</h1>
+    <p class="subtitle">Upload CMM inspection reports for SPC analysis and AI-powered summaries</p>
+  </header>
+
+  <main>
+    <!-- Upload area -->
+    <section id="upload-section">
+      <div id="drop-zone">
+        <div class="drop-content">
+          <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+            <path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/>
+            <polyline points="17 8 12 3 7 8"/>
+            <line x1="12" y1="3" x2="12" y2="15"/>
+          </svg>
+          <p>Drag &amp; drop PDF or Excel files here</p>
+          <p class="hint">or click to browse &mdash; .pdf, .xlsx, .xls, .csv</p>
+        </div>
+        <input type="file" id="file-input" multiple accept=".pdf,.xlsx,.xls,.csv" hidden>
+      </div>
+      <div id="file-list"></div>
+      <button id="upload-btn" disabled>Analyze Files</button>
+    </section>
+
+    <!-- Status -->
+    <section id="status-section" hidden>
+      <div class="spinner"></div>
+      <p id="status-text">Processing files...</p>
+    </section>
+
+    <!-- Results -->
+    <section id="results-section" hidden>
+      <div id="results-container"></div>
+    </section>
+  </main>
+
+  <script src="/js/app.js"></script>
+</body>
+</html>
diff --git a/app/static/js/app.js b/app/static/js/app.js
new file mode 100644
index 0000000..52d24c1
--- /dev/null
+++ b/app/static/js/app.js
@@ -0,0 +1,204 @@
+(() => {
+  const dropZone = document.getElementById("drop-zone");
+  const fileInput = document.getElementById("file-input");
+  const fileList = document.getElementById("file-list");
+  const uploadBtn = document.getElementById("upload-btn");
+  const uploadSection = document.getElementById("upload-section");
+  const statusSection = document.getElementById("status-section");
+  const statusText = document.getElementById("status-text");
+  const resultsSection = document.getElementById("results-section");
+  const resultsContainer = document.getElementById("results-container");
+
+  let selectedFiles = [];
+
+  // Drag & drop
+  dropZone.addEventListener("click", () => fileInput.click());
+  dropZone.addEventListener("dragover", (e) => { e.preventDefault(); dropZone.classList.add("dragover"); });
+  dropZone.addEventListener("dragleave", () => dropZone.classList.remove("dragover"));
+  dropZone.addEventListener("drop", (e) => {
+    e.preventDefault();
+    dropZone.classList.remove("dragover");
+    addFiles(e.dataTransfer.files);
+  });
+  fileInput.addEventListener("change", () => addFiles(fileInput.files));
+
+  function addFiles(files) {
+    for (const f of files) {
+      if (!selectedFiles.some((s) => s.name === f.name && s.size === f.size)) {
+        selectedFiles.push(f);
+      }
+    }
+    renderFileList();
+  }
+
+  function renderFileList() {
+    fileList.innerHTML = "";
+    selectedFiles.forEach((f, i) => {
+      const tag = document.createElement("span");
+      tag.className = "file-tag";
+      tag.innerHTML = `${f.name} <span class="remove" data-idx="${i}">&times;</span>`;
+      fileList.appendChild(tag);
+    });
+    fileList.querySelectorAll(".remove").forEach((btn) =>
+      btn.addEventListener("click", (e) => {
+        selectedFiles.splice(+e.target.dataset.idx, 1);
+        renderFileList();
+      })
+    );
+    uploadBtn.disabled = selectedFiles.length === 0;
+  }
+
+  // Upload
+  uploadBtn.addEventListener("click", async () => {
+    if (!selectedFiles.length) return;
+
+    uploadSection.hidden = true;
+    statusSection.hidden = false;
+    resultsSection.hidden = true;
+    statusText.textContent = `Uploading ${selectedFiles.length} file(s)...`;
+
+    const form = new FormData();
+    selectedFiles.forEach((f) => form.append("files", f));
+
+    try {
+      const resp = await fetch("/api/upload", { method: "POST", body: form });
+      if (!resp.ok) {
+        const err = await resp.json().catch(() => ({ detail: resp.statusText }));
+        throw new Error(err.detail || "Upload failed");
+      }
+      const { batch_id } = await resp.json();
+      statusText.textContent = "Analyzing...";
+      await pollResults(batch_id);
+    } catch (err) {
+      statusSection.hidden = true;
+      uploadSection.hidden = false;
+      alert("Error: " + err.message);
+    }
+  });
+
+  async function pollResults(batchId) {
+    const maxAttempts = 60;
+    for (let i = 0; i < maxAttempts; i++) {
+      const resp = await fetch(`/api/results/${batchId}`);
+      if (!resp.ok) { await sleep(1000); continue; }
+      const data = await resp.json();
+      if (data.status === "complete") {
+        renderResults(data);
+        return;
+      }
+      statusText.textContent = `Analyzing... (${i + 1}s)`;
+      await sleep(1000);
+    }
+    statusSection.hidden = true;
+    uploadSection.hidden = false;
+    alert("Timed out waiting for results");
+  }
+
+  function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); }
+
+  // Render
+  function renderResults(data) {
+    statusSection.hidden = true;
+    resultsSection.hidden = false;
+    resultsContainer.innerHTML = "";
+
+    for (const file of data.files) {
+      const div = document.createElement("div");
+      div.className = "file-result";
+
+      if (file.error) {
+        div.innerHTML = `<h2>${esc(file.filename)}</h2><p class="error">Error: ${esc(file.error)}</p>`;
+        resultsContainer.appendChild(div);
+        continue;
+      }
+
+      let html = `<h2>${esc(file.filename)}</h2>`;
+
+      // AI Summary
+      html += `<div class="summary">${esc(file.summary)}</div>`;
+
+      // SPC table
+      if (file.spc.length) {
+        html += `<table class="spc-table"><thead><tr>
+          <th>Feature</th><th>n</th><th>Mean</th><th>Std</th>
+          <th>Cp</th><th>Cpk</th><th>Pp</th><th>Ppk</th>
+          <th>USL</th><th>LSL</th><th>OOS</th>
+        </tr></thead><tbody>`;
+        for (const s of file.spc) {
+          const cpkClass = s.cpk === null ? "" : s.cpk >= 1.33 ? "cpk-good" : s.cpk >= 1.0 ? "cpk-warn" : "cpk-bad";
+          html += `<tr>
+            <td>${esc(s.feature_name)}</td><td>${s.n}</td>
+            <td>${s.mean}</td><td>${s.std}</td>
+            <td>${fmtIdx(s.cp)}</td><td class="${cpkClass}">${fmtIdx(s.cpk)}</td>
+            <td>${fmtIdx(s.pp)}</td><td>${fmtIdx(s.ppk)}</td>
+            <td>${s.usl}</td><td>${s.lsl}</td><td>${s.out_of_spec_count}</td>
+          </tr>`;
+        }
+        html += `</tbody></table>`;
+      }
+
+      // Charts
+      html += `<div class="charts-grid">`;
+      const histDivs = (file.charts.histograms || []).map((_, i) => `hist-${data.batch_id}-${file.filename}-${i}`);
+      const ctrlDivs = (file.charts.control_charts || []).map((_, i) => `ctrl-${data.batch_id}-${file.filename}-${i}`);
+      const capDiv = file.charts.capability_bar ? `cap-${data.batch_id}-${file.filename}` : null;
+
+      histDivs.forEach((id) => { html += `<div class="chart-container" id="${id}"></div>`; });
+      ctrlDivs.forEach((id) => { html += `<div class="chart-container" id="${id}"></div>`; });
+      if (capDiv) html += `<div class="chart-container chart-full" id="${capDiv}"></div>`;
+      html += `</div>`;
+
+      // Measurements toggle
+      if (file.report.measurements && file.report.measurements.length) {
+        const tableId = `meas-${data.batch_id}-${file.filename}`;
+        html += `<button class="meas-toggle" onclick="document.getElementById('${tableId}').hidden = !document.getElementById('${tableId}').hidden">
+          Show/Hide Measurements (${file.report.measurements.length})
+        </button>`;
+        html += `<div id="${tableId}" hidden><table class="meas-table"><thead><tr>
+          <th>Feature</th><th>Nominal</th><th>Actual</th><th>Dev</th>
+          <th>Tol+</th><th>Tol-</th><th>USL</th><th>LSL</th><th>Status</th>
+        </tr></thead><tbody>`;
+        for (const m of file.report.measurements) {
+          const cls = m.in_tolerance ? "" : "oot";
+          html += `<tr class="${cls}">
+            <td>${esc(m.feature_name)}</td><td>${m.nominal}</td><td>${m.actual}</td>
+            <td>${m.deviation.toFixed(4)}</td><td>+${m.tolerance_plus}</td><td>${m.tolerance_minus}</td>
+            <td>${m.usl}</td><td>${m.lsl}</td>
+            <td>${m.in_tolerance ? "OK" : "OOT"}</td>
+          </tr>`;
+        }
+        html += `</tbody></table></div>`;
+      }
+
+      div.innerHTML = html;
+      resultsContainer.appendChild(div);
+
+      // Render Plotly charts after DOM insertion
+      requestAnimationFrame(() => {
+        (file.charts.histograms || []).forEach((chart, i) => {
+          Plotly.newPlot(histDivs[i], chart.data, { ...chart.layout, autosize: true }, { responsive: true });
+        });
+        (file.charts.control_charts || []).forEach((chart, i) => {
+          Plotly.newPlot(ctrlDivs[i], chart.data, { ...chart.layout, autosize: true }, { responsive: true });
+        });
+        if (capDiv && file.charts.capability_bar) {
+          const cap = file.charts.capability_bar;
+          Plotly.newPlot(capDiv, cap.data, { ...cap.layout, autosize: true }, { responsive: true });
+        }
+      });
+    }
+
+    // Reset for new uploads
+    selectedFiles = [];
+    renderFileList();
+    uploadSection.hidden = false;
+  }
+
+  function esc(s) {
+    const d = document.createElement("div");
+    d.textContent = s || "";
+    return d.innerHTML;
+  }
+
+  function fmtIdx(v) { return v === null || v === undefined ? "N/A" : v.toFixed(3); }
+})();
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..0692bb7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,32 @@
+[project]
+name = "cmm-report-analyzer"
+version = "0.1.0"
+description = "CMM inspection report analyzer with SPC metrics and AI summaries"
+requires-python = ">=3.11"
+dependencies = [
+    "fastapi>=0.115",
+    "uvicorn[standard]>=0.32",
+    "pdfplumber>=0.11",
+    "pandas>=2.2",
+    "openpyxl>=3.1",
+    "numpy>=2.0",
+    "scipy>=1.14",
+    "plotly>=5.24",
+    "openai>=1.50",
+    "pydantic-settings>=2.6",
+    "python-multipart>=0.0.12",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0",
+    "pytest-asyncio>=0.24",
+    "httpx>=0.27",
+]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+
+[build-system]
+requires = ["setuptools>=75"]
+build-backend = "setuptools.build_meta"
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 0000000..977265c
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,72 @@
+import io
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app.main import app
+
+
+@pytest.fixture
+def sample_excel() -> bytes:
+    df = pd.DataFrame({
+        "Feature Name": ["D1", "D1", "D1", "D2", "D2", "D2"],
+        "Nominal": [10.0, 10.0, 10.0, 20.0, 20.0, 20.0],
+        "Actual": [10.02, 9.99, 10.01, 20.05, 19.97, 20.02],
+        "Tol+": [0.05, 0.05, 0.05, 0.10, 0.10, 0.10],
+        "Tol-": [-0.05, -0.05, -0.05, -0.10, -0.10, -0.10],
+    })
+    buf = io.BytesIO()
+    df.to_excel(buf, index=False)
+    buf.seek(0)
+    return buf.read()
+
+
+@pytest.mark.asyncio
+async def test_upload_and_results(sample_excel):
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post(
+            "/api/upload",
+            files=[("files", ("test.xlsx", sample_excel, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))],
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "batch_id" in data
+
+        resp2 = await client.get(f"/api/results/{data['batch_id']}")
+        assert resp2.status_code == 200
+        result = resp2.json()
+        assert result["status"] == "complete"
+        assert len(result["files"]) == 1
+        assert result["files"][0]["filename"] == "test.xlsx"
+        assert len(result["files"][0]["spc"]) == 2  # D1 and D2
+
+
+@pytest.mark.asyncio
+async def test_upload_no_files():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post("/api/upload", files=[])
+        assert resp.status_code in (400, 422)
+
+
+@pytest.mark.asyncio
+async def test_upload_unsupported_type():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post(
+            "/api/upload",
+            files=[("files", ("test.png", b"fake", "image/png"))],
+        )
+        assert resp.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_results_not_found():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.get("/api/results/nonexistent")
+        assert resp.status_code == 404
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
new file mode 100644
index 0000000..f988cb8
--- /dev/null
+++ b/tests/test_parsers.py
@@ -0,0 +1,90 @@
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+from app.parsers.base import get_parser, match_column
+from app.parsers.excel_parser import ExcelParser
+from app.parsers.models import MeasurementRecord
+
+
+def test_match_column_nominal():
+    assert match_column("Nominal") == "nominal"
+    assert match_column("NOM") == "nominal"
+    assert match_column("Target Value") == "nominal"
+
+
+def test_match_column_actual():
+    assert match_column("Actual") == "actual"
+    assert match_column("Measured") == "actual"
+
+
+def test_match_column_unknown():
+    assert match_column("random_xyz") is None
+
+
+def test_get_parser_pdf():
+    p = get_parser("report.pdf")
+    from app.parsers.pdf_parser import PDFParser
+    assert isinstance(p, PDFParser)
+
+
+def test_get_parser_excel():
+    p = get_parser("data.xlsx")
+    assert isinstance(p, ExcelParser)
+
+
+def test_get_parser_csv():
+    p = get_parser("data.csv")
+    assert isinstance(p, ExcelParser)
+
+
+def test_get_parser_unsupported():
+    try:
+        get_parser("image.png")
+        assert False, "Should have raised"
+    except ValueError:
+        pass
+
+
+def test_measurement_record_properties():
+    rec = MeasurementRecord(
+        feature_name="D1",
+        nominal=10.0,
+        tolerance_plus=0.05,
+        tolerance_minus=-0.05,
+        actual=10.02,
+    )
+    assert rec.usl == 10.05
+    assert rec.lsl == 9.95
+    assert rec.in_tolerance is True
+
+
+def test_measurement_record_out_of_tolerance():
+    rec = MeasurementRecord(
+        feature_name="D1",
+        nominal=10.0,
+        tolerance_plus=0.05,
+        tolerance_minus=-0.05,
+        actual=10.10,
+    )
+    assert rec.in_tolerance is False
+
+
+def test_excel_parser_with_standard_headers():
+    df = pd.DataFrame({
+        "Feature Name": ["D1", "D2", "D3"],
+        "Nominal": [10.0, 20.0, 30.0],
+        "Actual": [10.02, 19.98, 30.05],
+        "Tol+": [0.05, 0.10, 0.10],
+        "Tol-": [-0.05, -0.10, -0.10],
+    })
+    with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
+        df.to_excel(f.name, index=False)
+        parser = ExcelParser()
+        report = parser.parse(Path(f.name))
+
+    assert len(report.measurements) == 3
+    assert report.measurements[0].feature_name == "D1"
+    assert report.measurements[0].nominal == 10.0
+    assert report.measurements[0].actual == 10.02
diff --git a/tests/test_spc.py b/tests/test_spc.py
new file mode 100644
index 0000000..1998c94
--- /dev/null
+++ b/tests/test_spc.py
@@ -0,0 +1,84 @@
+from app.analysis.spc import SPCResult, calculate_spc
+from app.parsers.models import MeasurementRecord
+
+
+def _make_records(name: str, nominal: float, tol: float, actuals: list[float]):
+    return [
+        MeasurementRecord(
+            feature_name=name,
+            nominal=nominal,
+            tolerance_plus=tol,
+            tolerance_minus=-tol,
+            actual=a,
+            deviation=a - nominal,
+        )
+        for a in actuals
+    ]
+
+
+def test_single_measurement_returns_none_indices():
+    records = _make_records("D1", 10.0, 0.05, [10.01])
+    results = calculate_spc(records)
+    assert len(results) == 1
+    r = results[0]
+    assert r.cp is None
+    assert r.cpk is None
+    assert r.pp is None
+    assert r.ppk is None
+
+
+def test_basic_spc_calculation():
+    actuals = [10.01, 10.02, 9.99, 10.00, 10.03, 9.98, 10.01, 10.02, 9.99, 10.00]
+    records = _make_records("D1", 10.0, 0.05, actuals)
+    results = calculate_spc(records)
+
+    assert len(results) == 1
+    r = results[0]
+    assert r.n == 10
+    assert r.pp is not None
+    assert r.ppk is not None
+    assert r.cp is not None
+    assert r.cpk is not None
+    assert r.pp > 0
+    assert r.cpk > 0
+    assert r.out_of_spec_count == 0
+
+
+def test_out_of_spec_count():
+    actuals = [10.0, 10.06, 9.94, 10.0, 10.0]  # 10.06 and 9.94 outside ±0.05
+    records = _make_records("D1", 10.0, 0.05, actuals)
+    results = calculate_spc(records)
+    assert results[0].out_of_spec_count == 2
+
+
+def test_multiple_features():
+    records = (
+        _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
+        + _make_records("D2", 20.0, 0.10, [20.05, 19.95, 20.01])
+    )
+    results = calculate_spc(records)
+    assert len(results) == 2
+    names = {r.feature_name for r in results}
+    assert names == {"D1", "D2"}
+
+
+def test_shapiro_not_computed_for_small_n():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02])
+    results = calculate_spc(records)
+    assert results[0].shapiro_p is None
+
+
+def test_shapiro_computed_for_n_ge_3():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
+    results = calculate_spc(records)
+    assert results[0].shapiro_p is not None
+
+
+def test_to_dict():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99, 10.00, 10.03])
+    results = calculate_spc(records)
+    d = results[0].to_dict()
+    assert "feature_name" in d
+    assert "cpk" in d
+    assert "values" in d
+    assert isinstance(d["values"], list)