commit 9abf9b4b58fe7a50d2fbcc2a18567d4af0c9094d Author: chrisryn Date: Thu Feb 19 10:38:51 2026 -0600 Initial commit: CMM Report Analyzer FastAPI app that parses CMM inspection reports (PDF/Excel/CSV), computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk), generates interactive Plotly charts, and provides AI-powered quality summaries via Azure OpenAI with graceful fallback. Includes 21 passing tests covering parsers, SPC calculations, and API endpoints. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d531d44 --- /dev/null +++ b/.env.example @@ -0,0 +1,4 @@ +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com +AZURE_OPENAI_API_KEY=your-key-here +AZURE_OPENAI_DEPLOYMENT=gpt-4o +AZURE_OPENAI_API_VERSION=2024-10-21 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60ee068 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +__pycache__/ +*.py[cod] +*.egg-info/ +dist/ +build/ +.env +.venv/ +venv/ +*.tmp +uploads/ diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/ai/__init__.py b/app/ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/ai/summarizer.py b/app/ai/summarizer.py new file mode 100644 index 0000000..3464804 --- /dev/null +++ b/app/ai/summarizer.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import logging + +from openai import AsyncAzureOpenAI + +from app.analysis.spc import SPCResult +from app.config import settings +from app.parsers.models import ParsedReport + +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT = """\ +You are a quality engineer reviewing CMM (Coordinate Measuring Machine) inspection data. +Provide a concise, actionable summary that includes: +1. Overall pass/fail assessment +2. Features of concern (low Cpk, out-of-tolerance, trends) +3. Root-cause hypotheses for any deviations +4. Recommended corrective actions +Use precise engineering language. Reference feature names and numeric values.""" + + +async def summarize( + report: ParsedReport, spc_results: list[SPCResult] +) -> str: + """Generate an AI-powered quality summary. Returns fallback text on failure.""" + if not settings.azure_openai_endpoint or not settings.azure_openai_api_key: + return _fallback_summary(report, spc_results) + + spc_text = _format_spc(spc_results) + user_msg = ( + f"File: {report.filename}\n" + f"Measurements: {len(report.measurements)}, " + f"Out of tolerance: {len(report.out_of_tolerance)}\n\n" + f"SPC Results:\n{spc_text}\n\n" + f"Raw report excerpt:\n{report.raw_text[:3000]}" + ) + + try: + client = AsyncAzureOpenAI( + azure_endpoint=settings.azure_openai_endpoint, + api_key=settings.azure_openai_api_key, + api_version=settings.azure_openai_api_version, + ) + response = await client.chat.completions.create( + model=settings.azure_openai_deployment, + messages=[ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": user_msg}, + ], + temperature=0.3, + max_tokens=1024, + ) + return response.choices[0].message.content or _fallback_summary( + report, spc_results + ) + except Exception: + logger.exception("Azure OpenAI call failed, using fallback summary") + return _fallback_summary(report, spc_results) + + +def _format_spc(results: list[SPCResult]) -> str: + lines: list[str] = [] + for r in results: + cpk_str = f"{r.cpk:.3f}" if r.cpk is not None else "N/A" + ppk_str = f"{r.ppk:.3f}" if r.ppk is not None else "N/A" + lines.append( + f" {r.feature_name}: n={r.n}, mean={r.mean:.4f}, " + f"Cpk={cpk_str}, Ppk={ppk_str}, OOS={r.out_of_spec_count}" + ) + return "\n".join(lines) if lines else " No SPC data available." + + +def _fallback_summary(report: ParsedReport, spc_results: list[SPCResult]) -> str: + total = len(report.measurements) + oot = len(report.out_of_tolerance) + status = "PASS" if oot == 0 else "FAIL" + + lines = [ + f"**Inspection Summary for {report.filename}**", + f"Status: **{status}** — {total} measurements, {oot} out of tolerance.", + "", + ] + if oot > 0: + lines.append("Out-of-tolerance features:") + for m in report.out_of_tolerance: + lines.append( + f" - {m.feature_name}: actual={m.actual:.4f}, " + f"nominal={m.nominal:.4f}, tolerance=[{m.lsl:.4f}, {m.usl:.4f}]" + ) + lines.append("") + + for r in spc_results: + if r.cpk is not None and r.cpk < 1.0: + lines.append( + f" Warning: {r.feature_name} Cpk={r.cpk:.3f} (below 1.0)" + ) + + lines.append("") + lines.append("*(AI summary unavailable — configure Azure OpenAI for enhanced analysis)*") + return "\n".join(lines) diff --git a/app/analysis/__init__.py b/app/analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/analysis/charts.py b/app/analysis/charts.py new file mode 100644 index 0000000..dcccde7 --- /dev/null +++ b/app/analysis/charts.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import plotly.graph_objects as go + +from app.analysis.spc import SPCResult + + +def histogram(result: SPCResult) -> dict: + """Distribution histogram with spec limits overlay.""" + fig = go.Figure() + fig.add_trace(go.Histogram(x=result.values, name="Measurements", nbinsx=20)) + fig.add_vline(x=result.usl, line_dash="dash", line_color="red", + annotation_text="USL") + fig.add_vline(x=result.lsl, line_dash="dash", line_color="red", + annotation_text="LSL") + fig.add_vline(x=result.nominal, line_dash="dot", line_color="green", + annotation_text="Nominal") + fig.update_layout( + title=f"Distribution – {result.feature_name}", + xaxis_title="Value", yaxis_title="Count", + template="plotly_white", height=350, + ) + return fig.to_plotly_json() + + +def control_chart(result: SPCResult) -> dict: + """Individual values control chart (I-chart).""" + x_axis = list(range(1, result.n + 1)) + fig = go.Figure() + fig.add_trace(go.Scatter( + x=x_axis, y=result.values, mode="lines+markers", name="Value", + )) + fig.add_hline(y=result.mean, line_color="green", annotation_text="Mean") + fig.add_hline(y=result.ucl, line_dash="dash", line_color="red", + annotation_text="UCL") + fig.add_hline(y=result.lcl, line_dash="dash", line_color="red", + annotation_text="LCL") + fig.add_hline(y=result.usl, line_dash="dot", line_color="orange", + annotation_text="USL") + fig.add_hline(y=result.lsl, line_dash="dot", line_color="orange", + annotation_text="LSL") + fig.update_layout( + title=f"Control Chart – {result.feature_name}", + xaxis_title="Sample #", yaxis_title="Value", + template="plotly_white", height=350, + ) + return fig.to_plotly_json() + + +def capability_bar(results: list[SPCResult]) -> dict: + """Capability index bar chart comparing all features.""" + names = [r.feature_name for r in results] + cpk_vals = [r.cpk if r.cpk is not None else 0.0 for r in results] + ppk_vals = [r.ppk if r.ppk is not None else 0.0 for r in results] + + colors = ["#2ecc71" if v >= 1.33 else "#f39c12" if v >= 1.0 else "#e74c3c" + for v in cpk_vals] + + fig = go.Figure() + fig.add_trace(go.Bar(x=names, y=cpk_vals, name="Cpk", marker_color=colors)) + fig.add_trace(go.Bar(x=names, y=ppk_vals, name="Ppk", marker_color="rgba(52,152,219,0.6)")) + fig.add_hline(y=1.33, line_dash="dash", line_color="green", + annotation_text="Cpk=1.33") + fig.add_hline(y=1.0, line_dash="dot", line_color="orange", + annotation_text="Cpk=1.0") + fig.update_layout( + title="Process Capability Summary", + xaxis_title="Feature", yaxis_title="Index", + barmode="group", template="plotly_white", height=400, + ) + return fig.to_plotly_json() + + +def generate_charts(results: list[SPCResult]) -> dict: + """Generate all charts for a set of SPC results.""" + charts: dict[str, list[dict] | dict] = { + "histograms": [], + "control_charts": [], + } + for r in results: + if r.n >= 2: + charts["histograms"].append(histogram(r)) + charts["control_charts"].append(control_chart(r)) + if results: + charts["capability_bar"] = capability_bar(results) + return charts diff --git a/app/analysis/spc.py b/app/analysis/spc.py new file mode 100644 index 0000000..bd52cfe --- /dev/null +++ b/app/analysis/spc.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +from scipy import stats + +from app.parsers.models import MeasurementRecord + +# d2 constants for subgroup sizes 2–10 (R-bar/d2 method) +_D2 = {2: 1.128, 3: 1.693, 4: 2.059, 5: 2.326, 6: 2.534, 7: 2.704, 8: 2.847, 9: 2.970, 10: 3.078} + + +@dataclass +class SPCResult: + feature_name: str + n: int + mean: float + std: float + min_val: float + max_val: float + usl: float + lsl: float + nominal: float + cp: float | None + cpk: float | None + pp: float | None + ppk: float | None + ucl: float + lcl: float + out_of_spec_count: int + shapiro_p: float | None + values: list[float] + + def to_dict(self) -> dict: + return { + "feature_name": self.feature_name, + "n": self.n, + "mean": round(self.mean, 6), + "std": round(self.std, 6), + "min": round(self.min_val, 6), + "max": round(self.max_val, 6), + "usl": round(self.usl, 6), + "lsl": round(self.lsl, 6), + "nominal": round(self.nominal, 6), + "cp": _r(self.cp), + "cpk": _r(self.cpk), + "pp": _r(self.pp), + "ppk": _r(self.ppk), + "ucl": round(self.ucl, 6), + "lcl": round(self.lcl, 6), + "out_of_spec_count": self.out_of_spec_count, + "shapiro_p": _r(self.shapiro_p), + "values": [round(v, 6) for v in self.values], + } + + +def _r(v: float | None) -> float | None: + return round(v, 4) if v is not None else None + + +def calculate_spc( + records: list[MeasurementRecord], subgroup_size: int = 5 +) -> list[SPCResult]: + """Calculate SPC metrics grouped by feature name.""" + groups: dict[str, list[MeasurementRecord]] = {} + for rec in records: + groups.setdefault(rec.feature_name, []).append(rec) + + results: list[SPCResult] = [] + for name, recs in groups.items(): + values = [r.actual for r in recs] + n = len(values) + if n < 1: + continue + + arr = np.array(values) + mean = float(np.mean(arr)) + usl = recs[0].usl + lsl = recs[0].lsl + nominal = recs[0].nominal + + if n < 2: + results.append( + SPCResult( + feature_name=name, n=n, mean=mean, std=0.0, + min_val=values[0], max_val=values[0], + usl=usl, lsl=lsl, nominal=nominal, + cp=None, cpk=None, pp=None, ppk=None, + ucl=mean, lcl=mean, + out_of_spec_count=sum(1 for v in values if v < lsl or v > usl), + shapiro_p=None, values=values, + ) + ) + continue + + std_overall = float(np.std(arr, ddof=1)) + tol_range = usl - lsl + + # Pp, Ppk (overall) + pp = tol_range / (6 * std_overall) if std_overall > 0 else None + ppk = ( + min((usl - mean), (mean - lsl)) / (3 * std_overall) + if std_overall > 0 + else None + ) + + # Cp, Cpk (within-subgroup using R-bar/d2) + std_within = _within_subgroup_sigma(arr, subgroup_size) + cp = tol_range / (6 * std_within) if std_within and std_within > 0 else None + cpk = ( + min((usl - mean), (mean - lsl)) / (3 * std_within) + if std_within and std_within > 0 + else None + ) + + # Control limits (X-bar chart, 3-sigma) + ucl = mean + 3 * std_overall + lcl = mean - 3 * std_overall + + # Shapiro-Wilk normality test (need 3 ≤ n ≤ 5000) + shapiro_p = None + if 3 <= n <= 5000: + _, shapiro_p = stats.shapiro(arr) + shapiro_p = float(shapiro_p) + + out_of_spec = sum(1 for v in values if v < lsl or v > usl) + + results.append( + SPCResult( + feature_name=name, n=n, mean=mean, std=std_overall, + min_val=float(np.min(arr)), max_val=float(np.max(arr)), + usl=usl, lsl=lsl, nominal=nominal, + cp=cp, cpk=cpk, pp=pp, ppk=ppk, + ucl=ucl, lcl=lcl, + out_of_spec_count=out_of_spec, + shapiro_p=shapiro_p, values=values, + ) + ) + return results + + +def _within_subgroup_sigma(arr: np.ndarray, subgroup_size: int) -> float | None: + """Estimate within-subgroup sigma using R-bar / d2 method.""" + n = len(arr) + if n < 2: + return None + + sg = min(subgroup_size, n) + d2 = _D2.get(sg) + if d2 is None: + # Fallback: use overall std if subgroup size not in d2 table + return float(np.std(arr, ddof=1)) + + ranges: list[float] = [] + for i in range(0, n - sg + 1, sg): + subgroup = arr[i : i + sg] + if len(subgroup) >= 2: + ranges.append(float(np.max(subgroup) - np.min(subgroup))) + + if not ranges: + return float(np.std(arr, ddof=1)) + + r_bar = float(np.mean(ranges)) + return r_bar / d2 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..78bcfc9 --- /dev/null +++ b/app/config.py @@ -0,0 +1,14 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + azure_openai_endpoint: str = "" + azure_openai_api_key: str = "" + azure_openai_deployment: str = "gpt-4o" + azure_openai_api_version: str = "2024-10-21" + max_upload_mb: int = 50 + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} + + +settings = Settings() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..ef2f2ae --- /dev/null +++ b/app/main.py @@ -0,0 +1,13 @@ +from pathlib import Path + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + +from app.routers import upload, results + +app = FastAPI(title="CMM Report Analyzer") + +app.include_router(upload.router, prefix="/api") +app.include_router(results.router, prefix="/api") + +app.mount("/", StaticFiles(directory=Path(__file__).parent / "static", html=True)) diff --git a/app/parsers/__init__.py b/app/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/parsers/base.py b/app/parsers/base.py new file mode 100644 index 0000000..e6016df --- /dev/null +++ b/app/parsers/base.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import re +from abc import ABC, abstractmethod +from pathlib import Path + +from app.parsers.models import ParsedReport + +# Fuzzy column-name patterns → canonical field name +COLUMN_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "feature_name": [ + re.compile(r"feat|char|dimen|label|id|name|item", re.I), + ], + "nominal": [ + re.compile(r"nom|target|blueprint|print", re.I), + ], + "tolerance_plus": [ + re.compile(r"tol.*\+|upper.*tol|\+.*tol|usl|dev.*\+|pos.*tol", re.I), + ], + "tolerance_minus": [ + re.compile(r"tol.*-|lower.*tol|-.*tol|lsl|dev.*-|neg.*tol", re.I), + ], + "actual": [ + re.compile(r"actual|meas|value|result|reading", re.I), + ], + "deviation": [ + re.compile(r"dev(?!.*tol)|diff|error|delta", re.I), + ], +} + + +def match_column(header: str) -> str | None: + """Return the canonical field name for a header string, or None.""" + header = header.strip() + for field_name, patterns in COLUMN_PATTERNS.items(): + for pat in patterns: + if pat.search(header): + return field_name + return None + + +class CMMParser(ABC): + @abstractmethod + def parse(self, path: Path) -> ParsedReport: ... + + +def get_parser(filename: str) -> CMMParser: + suffix = Path(filename).suffix.lower() + if suffix == ".pdf": + from app.parsers.pdf_parser import PDFParser + return PDFParser() + if suffix in (".xlsx", ".xls", ".csv"): + from app.parsers.excel_parser import ExcelParser + return ExcelParser() + raise ValueError(f"Unsupported file type: {suffix}") diff --git a/app/parsers/excel_parser.py b/app/parsers/excel_parser.py new file mode 100644 index 0000000..b2f41cb --- /dev/null +++ b/app/parsers/excel_parser.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from pathlib import Path + +import pandas as pd + +from app.parsers.base import CMMParser, match_column +from app.parsers.models import MeasurementRecord, ParsedReport + + +class ExcelParser(CMMParser): + def parse(self, path: Path) -> ParsedReport: + if path.suffix.lower() == ".csv": + df = pd.read_csv(path) + else: + df = pd.read_excel(path, engine="openpyxl") + + col_map = self._map_columns(df.columns.tolist()) + measurements = self._extract(df, col_map) + return ParsedReport( + filename=path.name, + measurements=measurements, + metadata={"source": "excel", "rows": str(len(df))}, + raw_text=df.to_string(max_rows=200), + ) + + def _map_columns(self, headers: list[str]) -> dict[str, str]: + """Map canonical field names to actual DataFrame column names.""" + mapping: dict[str, str] = {} + for header in headers: + canonical = match_column(str(header)) + if canonical and canonical not in mapping: + mapping[canonical] = str(header) + return mapping + + def _extract( + self, df: pd.DataFrame, col_map: dict[str, str] + ) -> list[MeasurementRecord]: + required = {"feature_name", "nominal", "actual"} + if not required.issubset(col_map): + return self._fallback_extract(df) + + records: list[MeasurementRecord] = [] + for _, row in df.iterrows(): + try: + nominal = float(row[col_map["nominal"]]) + actual = float(row[col_map["actual"]]) + tol_plus = ( + float(row[col_map["tolerance_plus"]]) + if "tolerance_plus" in col_map + else 0.0 + ) + tol_minus = ( + float(row[col_map["tolerance_minus"]]) + if "tolerance_minus" in col_map + else 0.0 + ) + deviation = ( + float(row[col_map["deviation"]]) + if "deviation" in col_map + else actual - nominal + ) + records.append( + MeasurementRecord( + feature_name=str(row[col_map["feature_name"]]), + nominal=nominal, + tolerance_plus=abs(tol_plus), + tolerance_minus=-abs(tol_minus), + actual=actual, + deviation=deviation, + ) + ) + except (ValueError, TypeError): + continue + return records + + def _fallback_extract(self, df: pd.DataFrame) -> list[MeasurementRecord]: + """Best-effort extraction when column mapping is incomplete. + + Treats the first string column as the feature name and the first + three numeric columns as nominal, actual, tolerance_plus (with + tolerance_minus mirrored). + """ + numeric_cols = df.select_dtypes(include="number").columns.tolist() + str_cols = df.select_dtypes(include="object").columns.tolist() + if len(numeric_cols) < 2 or not str_cols: + return [] + + name_col = str_cols[0] + nom_col = numeric_cols[0] + act_col = numeric_cols[1] + tol_col = numeric_cols[2] if len(numeric_cols) > 2 else None + + records: list[MeasurementRecord] = [] + for _, row in df.iterrows(): + try: + nominal = float(row[nom_col]) + actual = float(row[act_col]) + tol = float(row[tol_col]) if tol_col else 0.0 + records.append( + MeasurementRecord( + feature_name=str(row[name_col]), + nominal=nominal, + tolerance_plus=abs(tol), + tolerance_minus=-abs(tol), + actual=actual, + deviation=actual - nominal, + ) + ) + except (ValueError, TypeError): + continue + return records diff --git a/app/parsers/models.py b/app/parsers/models.py new file mode 100644 index 0000000..f47bbad --- /dev/null +++ b/app/parsers/models.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass +class MeasurementRecord: + feature_name: str + nominal: float + tolerance_plus: float + tolerance_minus: float + actual: float + deviation: float = 0.0 + unit: str = "mm" + + @property + def usl(self) -> float: + return self.nominal + self.tolerance_plus + + @property + def lsl(self) -> float: + return self.nominal + self.tolerance_minus # tolerance_minus is negative + + @property + def in_tolerance(self) -> bool: + return self.lsl <= self.actual <= self.usl + + def to_dict(self) -> dict: + return { + "feature_name": self.feature_name, + "nominal": self.nominal, + "tolerance_plus": self.tolerance_plus, + "tolerance_minus": self.tolerance_minus, + "actual": self.actual, + "deviation": self.deviation, + "unit": self.unit, + "usl": self.usl, + "lsl": self.lsl, + "in_tolerance": self.in_tolerance, + } + + +@dataclass +class ParsedReport: + filename: str + measurements: list[MeasurementRecord] = field(default_factory=list) + metadata: dict[str, str] = field(default_factory=dict) + raw_text: str = "" + + @property + def out_of_tolerance(self) -> list[MeasurementRecord]: + return [m for m in self.measurements if not m.in_tolerance] + + def to_dict(self) -> dict: + return { + "filename": self.filename, + "metadata": self.metadata, + "measurement_count": len(self.measurements), + "out_of_tolerance_count": len(self.out_of_tolerance), + "measurements": [m.to_dict() for m in self.measurements], + } diff --git a/app/parsers/pdf_parser.py b/app/parsers/pdf_parser.py new file mode 100644 index 0000000..0200533 --- /dev/null +++ b/app/parsers/pdf_parser.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +from pathlib import Path + +import pdfplumber + +from app.parsers.base import CMMParser, match_column +from app.parsers.models import MeasurementRecord, ParsedReport + + +class PDFParser(CMMParser): + def parse(self, path: Path) -> ParsedReport: + text_parts: list[str] = [] + all_rows: list[dict[str, str | None]] = [] + headers: list[str] = [] + + with pdfplumber.open(path) as pdf: + for page in pdf.pages: + page_text = page.extract_text() or "" + text_parts.append(page_text) + + for table in page.extract_tables(): + if not table or not table[0]: + continue + if not headers: + headers = [str(c or "").strip() for c in table[0]] + data_rows = table[1:] + else: + data_rows = table + for row in data_rows: + if row and any(cell for cell in row): + all_rows.append( + { + headers[i]: (str(cell).strip() if cell else None) + for i, cell in enumerate(row) + if i < len(headers) + } + ) + + raw_text = "\n".join(text_parts) + col_map = {match_column(h): h for h in headers if match_column(h)} + measurements = self._extract(all_rows, col_map) + metadata = self._extract_metadata(raw_text) + metadata["source"] = "pdf" + + return ParsedReport( + filename=path.name, + measurements=measurements, + metadata=metadata, + raw_text=raw_text[:10_000], + ) + + def _extract( + self, + rows: list[dict[str, str | None]], + col_map: dict[str | None, str], + ) -> list[MeasurementRecord]: + required = {"feature_name", "nominal", "actual"} + if not required.issubset(col_map): + return self._fallback_extract(rows) + + records: list[MeasurementRecord] = [] + for row in rows: + try: + name = row.get(col_map["feature_name"]) or "" + nominal = _to_float(row.get(col_map["nominal"])) + actual = _to_float(row.get(col_map["actual"])) + if nominal is None or actual is None or not name: + continue + tol_plus = ( + _to_float(row.get(col_map.get("tolerance_plus", ""), "")) or 0.0 + ) + tol_minus = ( + _to_float(row.get(col_map.get("tolerance_minus", ""), "")) or 0.0 + ) + deviation = ( + _to_float(row.get(col_map.get("deviation", ""), "")) + or actual - nominal + ) + records.append( + MeasurementRecord( + feature_name=name, + nominal=nominal, + tolerance_plus=abs(tol_plus), + tolerance_minus=-abs(tol_minus), + actual=actual, + deviation=deviation, + ) + ) + except (ValueError, TypeError): + continue + return records + + def _fallback_extract( + self, rows: list[dict[str, str | None]] + ) -> list[MeasurementRecord]: + """Try to extract from rows even without full column mapping.""" + if not rows: + return [] + headers = list(rows[0].keys()) + # Heuristic: first string-looking column = name, then look for numeric columns + numeric_cols: list[str] = [] + name_col: str | None = None + for h in headers: + sample_vals = [r.get(h) for r in rows[:5] if r.get(h)] + if sample_vals and all(_to_float(v) is not None for v in sample_vals): + numeric_cols.append(h) + elif name_col is None and sample_vals: + name_col = h + if not name_col or len(numeric_cols) < 2: + return [] + + records: list[MeasurementRecord] = [] + for row in rows: + try: + name = row.get(name_col) or "" + nominal = _to_float(row.get(numeric_cols[0])) + actual = _to_float(row.get(numeric_cols[1])) + if nominal is None or actual is None or not name: + continue + tol = _to_float(row.get(numeric_cols[2])) if len(numeric_cols) > 2 else 0.0 + tol = tol or 0.0 + records.append( + MeasurementRecord( + feature_name=name, + nominal=nominal, + tolerance_plus=abs(tol), + tolerance_minus=-abs(tol), + actual=actual, + deviation=actual - nominal, + ) + ) + except (ValueError, TypeError): + continue + return records + + def _extract_metadata(self, text: str) -> dict[str, str]: + metadata: dict[str, str] = {} + import re + + for pattern, key in [ + (r"(?i)part\s*(?:no|number|#|:)\s*[:\s]*(\S+)", "part_number"), + (r"(?i)serial\s*(?:no|number|#|:)\s*[:\s]*(\S+)", "serial_number"), + (r"(?i)date\s*[:\s]+(\d[\d/\-\.]+\d)", "inspection_date"), + (r"(?i)program\s*[:\s]+(.+?)(?:\n|$)", "program"), + (r"(?i)operator\s*[:\s]+(.+?)(?:\n|$)", "operator"), + ]: + m = re.search(pattern, text) + if m: + metadata[key] = m.group(1).strip() + return metadata + + +def _to_float(val: str | None) -> float | None: + if val is None: + return None + val = val.strip().replace(",", "") + try: + return float(val) + except ValueError: + return None diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routers/results.py b/app/routers/results.py new file mode 100644 index 0000000..e66f018 --- /dev/null +++ b/app/routers/results.py @@ -0,0 +1,13 @@ +from fastapi import APIRouter, HTTPException + +from app.services.batch import get_batch + +router = APIRouter() + + +@router.get("/results/{batch_id}") +async def get_results(batch_id: str): + batch = get_batch(batch_id) + if batch is None: + raise HTTPException(404, "Batch not found") + return batch.to_dict() diff --git a/app/routers/upload.py b/app/routers/upload.py new file mode 100644 index 0000000..f36d8d5 --- /dev/null +++ b/app/routers/upload.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import tempfile +from pathlib import Path + +from fastapi import APIRouter, HTTPException, UploadFile + +from app.config import settings +from app.services.batch import process_batch + +router = APIRouter() + +ALLOWED_EXTENSIONS = {".pdf", ".xlsx", ".xls", ".csv"} + + +@router.post("/upload") +async def upload_files(files: list[UploadFile]): + if not files: + raise HTTPException(400, "No files provided") + + saved: list[Path] = [] + tmp_dir = Path(tempfile.mkdtemp(prefix="cmm_")) + + for f in files: + if not f.filename: + continue + ext = Path(f.filename).suffix.lower() + if ext not in ALLOWED_EXTENSIONS: + raise HTTPException( + 400, f"Unsupported file type: {ext}. Allowed: {ALLOWED_EXTENSIONS}" + ) + size = 0 + dest = tmp_dir / f.filename + with open(dest, "wb") as out: + while chunk := await f.read(1024 * 64): + size += len(chunk) + if size > settings.max_upload_mb * 1024 * 1024: + raise HTTPException(400, f"File too large (max {settings.max_upload_mb} MB)") + out.write(chunk) + saved.append(dest) + + if not saved: + raise HTTPException(400, "No valid files uploaded") + + batch_id = await process_batch(saved) + return {"batch_id": batch_id, "file_count": len(saved)} diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/batch.py b/app/services/batch.py new file mode 100644 index 0000000..00b6a7b --- /dev/null +++ b/app/services/batch.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import asyncio +import logging +import uuid +from dataclasses import dataclass, field +from pathlib import Path + +from app.ai.summarizer import summarize +from app.analysis.charts import generate_charts +from app.analysis.spc import SPCResult, calculate_spc +from app.parsers.base import get_parser +from app.parsers.models import ParsedReport + +logger = logging.getLogger(__name__) + +# In-memory store: batch_id → BatchResult +_store: dict[str, BatchResult] = {} + + +@dataclass +class FileResult: + filename: str + report: dict + spc: list[dict] + charts: dict + summary: str + error: str | None = None + + def to_dict(self) -> dict: + return { + "filename": self.filename, + "report": self.report, + "spc": self.spc, + "charts": self.charts, + "summary": self.summary, + "error": self.error, + } + + +@dataclass +class BatchResult: + batch_id: str + status: str = "processing" + files: list[FileResult] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "batch_id": self.batch_id, + "status": self.status, + "files": [f.to_dict() for f in self.files], + } + + +async def process_batch(file_paths: list[Path]) -> str: + """Process a batch of files and return the batch_id.""" + batch_id = uuid.uuid4().hex[:12] + batch = BatchResult(batch_id=batch_id) + _store[batch_id] = batch + + tasks = [_process_single(path) for path in file_paths] + results = await asyncio.gather(*tasks, return_exceptions=True) + + for result in results: + if isinstance(result, Exception): + logger.error("File processing failed: %s", result) + batch.files.append( + FileResult( + filename="unknown", + report={}, spc=[], charts={}, summary="", + error=str(result), + ) + ) + else: + batch.files.append(result) + + batch.status = "complete" + return batch_id + + +async def _process_single(path: Path) -> FileResult: + """Parse → SPC → Charts → AI summary for a single file.""" + try: + parser = get_parser(path.name) + report: ParsedReport = parser.parse(path) + + spc_results: list[SPCResult] = calculate_spc(report.measurements) + charts = generate_charts(spc_results) + summary = await summarize(report, spc_results) + + return FileResult( + filename=report.filename, + report=report.to_dict(), + spc=[s.to_dict() for s in spc_results], + charts=charts, + summary=summary, + ) + except Exception as exc: + logger.exception("Error processing %s", path.name) + return FileResult( + filename=path.name, + report={}, spc=[], charts={}, summary="", + error=str(exc), + ) + + +def get_batch(batch_id: str) -> BatchResult | None: + return _store.get(batch_id) diff --git a/app/static/css/style.css b/app/static/css/style.css new file mode 100644 index 0000000..650285d --- /dev/null +++ b/app/static/css/style.css @@ -0,0 +1,247 @@ +:root { + --bg: #f4f6f8; + --surface: #ffffff; + --primary: #1a5276; + --primary-light: #2980b9; + --accent: #2ecc71; + --danger: #e74c3c; + --warn: #f39c12; + --text: #2c3e50; + --text-muted: #7f8c8d; + --border: #dce1e6; + --radius: 8px; +} + +*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; + background: var(--bg); + color: var(--text); + line-height: 1.6; +} + +header { + background: var(--primary); + color: #fff; + padding: 1.5rem 2rem; +} + +header h1 { + font-size: 1.5rem; + font-weight: 600; + letter-spacing: 0.02em; +} + +.subtitle { + color: rgba(255, 255, 255, 0.7); + font-size: 0.9rem; + margin-top: 0.25rem; +} + +main { + max-width: 1200px; + margin: 0 auto; + padding: 2rem 1.5rem; +} + +/* Drop zone */ +#drop-zone { + border: 2px dashed var(--border); + border-radius: var(--radius); + padding: 3rem 2rem; + text-align: center; + cursor: pointer; + transition: border-color 0.2s, background 0.2s; + background: var(--surface); +} + +#drop-zone:hover, #drop-zone.dragover { + border-color: var(--primary-light); + background: rgba(41, 128, 185, 0.04); +} + +.drop-content svg { color: var(--text-muted); margin-bottom: 0.75rem; } +.drop-content p { color: var(--text); font-size: 1rem; } +.drop-content .hint { color: var(--text-muted); font-size: 0.85rem; margin-top: 0.25rem; } + +/* File list */ +#file-list { + margin-top: 1rem; + display: flex; + flex-wrap: wrap; + gap: 0.5rem; +} + +.file-tag { + background: var(--primary); + color: #fff; + padding: 0.3rem 0.75rem; + border-radius: 999px; + font-size: 0.85rem; + display: inline-flex; + align-items: center; + gap: 0.4rem; +} + +.file-tag .remove { + cursor: pointer; + opacity: 0.7; + font-weight: bold; +} + +.file-tag .remove:hover { opacity: 1; } + +/* Upload button */ +#upload-btn { + margin-top: 1rem; + padding: 0.6rem 2rem; + background: var(--primary); + color: #fff; + border: none; + border-radius: var(--radius); + font-size: 1rem; + cursor: pointer; + transition: background 0.2s; +} + +#upload-btn:hover:not(:disabled) { background: var(--primary-light); } +#upload-btn:disabled { opacity: 0.5; cursor: not-allowed; } + +/* Spinner */ +#status-section { + text-align: center; + padding: 3rem 0; +} + +.spinner { + width: 40px; + height: 40px; + border: 4px solid var(--border); + border-top-color: var(--primary); + border-radius: 50%; + margin: 0 auto 1rem; + animation: spin 0.8s linear infinite; +} + +@keyframes spin { to { transform: rotate(360deg); } } + +/* Results */ +.file-result { + background: var(--surface); + border-radius: var(--radius); + padding: 1.5rem; + margin-bottom: 1.5rem; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08); +} + +.file-result h2 { + font-size: 1.2rem; + color: var(--primary); + margin-bottom: 1rem; + padding-bottom: 0.5rem; + border-bottom: 2px solid var(--border); +} + +.file-result .error { + color: var(--danger); + font-weight: 600; +} + +/* SPC table */ +.spc-table { + width: 100%; + border-collapse: collapse; + font-size: 0.85rem; + margin-bottom: 1.5rem; + overflow-x: auto; + display: block; +} + +.spc-table th, .spc-table td { + padding: 0.5rem 0.75rem; + text-align: right; + border-bottom: 1px solid var(--border); + white-space: nowrap; +} + +.spc-table th { + background: var(--bg); + font-weight: 600; + text-align: right; + position: sticky; + top: 0; +} + +.spc-table th:first-child, .spc-table td:first-child { text-align: left; } + +.spc-table tr:hover td { background: rgba(41, 128, 185, 0.04); } + +.cpk-good { color: var(--accent); font-weight: 600; } +.cpk-warn { color: var(--warn); font-weight: 600; } +.cpk-bad { color: var(--danger); font-weight: 600; } + +/* Summary */ +.summary { + background: var(--bg); + border-left: 4px solid var(--primary); + padding: 1rem 1.25rem; + margin-bottom: 1.5rem; + border-radius: 0 var(--radius) var(--radius) 0; + white-space: pre-wrap; + font-size: 0.9rem; + line-height: 1.7; +} + +/* Charts */ +.charts-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 1rem; + margin-bottom: 1.5rem; +} + +.chart-container { + background: var(--surface); + border: 1px solid var(--border); + border-radius: var(--radius); + overflow: hidden; +} + +.chart-full { grid-column: 1 / -1; } + +/* Measurements table */ +.meas-toggle { + background: none; + border: 1px solid var(--border); + padding: 0.4rem 1rem; + border-radius: var(--radius); + cursor: pointer; + font-size: 0.85rem; + color: var(--primary); + margin-bottom: 1rem; +} + +.meas-toggle:hover { background: var(--bg); } + +.meas-table { + width: 100%; + border-collapse: collapse; + font-size: 0.8rem; +} + +.meas-table th, .meas-table td { + padding: 0.4rem 0.6rem; + border-bottom: 1px solid var(--border); + text-align: right; +} + +.meas-table th { background: var(--bg); font-weight: 600; } +.meas-table th:first-child, .meas-table td:first-child { text-align: left; } + +.meas-table .oot { background: rgba(231, 76, 60, 0.08); color: var(--danger); } + +@media (max-width: 768px) { + .charts-grid { grid-template-columns: 1fr; } + main { padding: 1rem; } +} diff --git a/app/static/index.html b/app/static/index.html new file mode 100644 index 0000000..229201b --- /dev/null +++ b/app/static/index.html @@ -0,0 +1,49 @@ + + + + + + CMM Report Analyzer + + + + +
+

CMM Report Analyzer

+

Upload CMM inspection reports for SPC analysis and AI-powered summaries

+
+ +
+ +
+
+
+ + + + + +

Drag & drop PDF or Excel files here

+

or click to browse — .pdf, .xlsx, .xls, .csv

+
+ +
+
+ +
+ + + + + + +
+ + + + diff --git a/app/static/js/app.js b/app/static/js/app.js new file mode 100644 index 0000000..52d24c1 --- /dev/null +++ b/app/static/js/app.js @@ -0,0 +1,204 @@ +(() => { + const dropZone = document.getElementById("drop-zone"); + const fileInput = document.getElementById("file-input"); + const fileList = document.getElementById("file-list"); + const uploadBtn = document.getElementById("upload-btn"); + const uploadSection = document.getElementById("upload-section"); + const statusSection = document.getElementById("status-section"); + const statusText = document.getElementById("status-text"); + const resultsSection = document.getElementById("results-section"); + const resultsContainer = document.getElementById("results-container"); + + let selectedFiles = []; + + // Drag & drop + dropZone.addEventListener("click", () => fileInput.click()); + dropZone.addEventListener("dragover", (e) => { e.preventDefault(); dropZone.classList.add("dragover"); }); + dropZone.addEventListener("dragleave", () => dropZone.classList.remove("dragover")); + dropZone.addEventListener("drop", (e) => { + e.preventDefault(); + dropZone.classList.remove("dragover"); + addFiles(e.dataTransfer.files); + }); + fileInput.addEventListener("change", () => addFiles(fileInput.files)); + + function addFiles(files) { + for (const f of files) { + if (!selectedFiles.some((s) => s.name === f.name && s.size === f.size)) { + selectedFiles.push(f); + } + } + renderFileList(); + } + + function renderFileList() { + fileList.innerHTML = ""; + selectedFiles.forEach((f, i) => { + const tag = document.createElement("span"); + tag.className = "file-tag"; + tag.innerHTML = `${f.name} ×`; + fileList.appendChild(tag); + }); + fileList.querySelectorAll(".remove").forEach((btn) => + btn.addEventListener("click", (e) => { + selectedFiles.splice(+e.target.dataset.idx, 1); + renderFileList(); + }) + ); + uploadBtn.disabled = selectedFiles.length === 0; + } + + // Upload + uploadBtn.addEventListener("click", async () => { + if (!selectedFiles.length) return; + + uploadSection.hidden = true; + statusSection.hidden = false; + resultsSection.hidden = true; + statusText.textContent = `Uploading ${selectedFiles.length} file(s)...`; + + const form = new FormData(); + selectedFiles.forEach((f) => form.append("files", f)); + + try { + const resp = await fetch("/api/upload", { method: "POST", body: form }); + if (!resp.ok) { + const err = await resp.json().catch(() => ({ detail: resp.statusText })); + throw new Error(err.detail || "Upload failed"); + } + const { batch_id } = await resp.json(); + statusText.textContent = "Analyzing..."; + await pollResults(batch_id); + } catch (err) { + statusSection.hidden = true; + uploadSection.hidden = false; + alert("Error: " + err.message); + } + }); + + async function pollResults(batchId) { + const maxAttempts = 60; + for (let i = 0; i < maxAttempts; i++) { + const resp = await fetch(`/api/results/${batchId}`); + if (!resp.ok) { await sleep(1000); continue; } + const data = await resp.json(); + if (data.status === "complete") { + renderResults(data); + return; + } + statusText.textContent = `Analyzing... (${i + 1}s)`; + await sleep(1000); + } + statusSection.hidden = true; + uploadSection.hidden = false; + alert("Timed out waiting for results"); + } + + function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); } + + // Render + function renderResults(data) { + statusSection.hidden = true; + resultsSection.hidden = false; + resultsContainer.innerHTML = ""; + + for (const file of data.files) { + const div = document.createElement("div"); + div.className = "file-result"; + + if (file.error) { + div.innerHTML = `

${esc(file.filename)}

Error: ${esc(file.error)}

`; + resultsContainer.appendChild(div); + continue; + } + + let html = `

${esc(file.filename)}

`; + + // AI Summary + html += `
${esc(file.summary)}
`; + + // SPC table + if (file.spc.length) { + html += ` + + + + `; + for (const s of file.spc) { + const cpkClass = s.cpk === null ? "" : s.cpk >= 1.33 ? "cpk-good" : s.cpk >= 1.0 ? "cpk-warn" : "cpk-bad"; + html += ` + + + + + + `; + } + html += `
FeaturenMeanStdCpCpkPpPpkUSLLSLOOS
${esc(s.feature_name)}${s.n}${s.mean}${s.std}${fmtIdx(s.cp)}${fmtIdx(s.cpk)}${fmtIdx(s.pp)}${fmtIdx(s.ppk)}${s.usl}${s.lsl}${s.out_of_spec_count}
`; + } + + // Charts + html += `
`; + const histDivs = (file.charts.histograms || []).map((_, i) => `hist-${data.batch_id}-${file.filename}-${i}`); + const ctrlDivs = (file.charts.control_charts || []).map((_, i) => `ctrl-${data.batch_id}-${file.filename}-${i}`); + const capDiv = file.charts.capability_bar ? `cap-${data.batch_id}-${file.filename}` : null; + + histDivs.forEach((id) => { html += `
`; }); + ctrlDivs.forEach((id) => { html += `
`; }); + if (capDiv) html += `
`; + html += `
`; + + // Measurements toggle + if (file.report.measurements && file.report.measurements.length) { + const tableId = `meas-${data.batch_id}-${file.filename}`; + html += ``; + html += ``; + } + + div.innerHTML = html; + resultsContainer.appendChild(div); + + // Render Plotly charts after DOM insertion + requestAnimationFrame(() => { + (file.charts.histograms || []).forEach((chart, i) => { + Plotly.newPlot(histDivs[i], chart.data, { ...chart.layout, autosize: true }, { responsive: true }); + }); + (file.charts.control_charts || []).forEach((chart, i) => { + Plotly.newPlot(ctrlDivs[i], chart.data, { ...chart.layout, autosize: true }, { responsive: true }); + }); + if (capDiv && file.charts.capability_bar) { + const cap = file.charts.capability_bar; + Plotly.newPlot(capDiv, cap.data, { ...cap.layout, autosize: true }, { responsive: true }); + } + }); + } + + // Reset for new uploads + selectedFiles = []; + renderFileList(); + uploadSection.hidden = false; + } + + function esc(s) { + const d = document.createElement("div"); + d.textContent = s || ""; + return d.innerHTML; + } + + function fmtIdx(v) { return v === null || v === undefined ? "N/A" : v.toFixed(3); } +})(); diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0692bb7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "cmm-report-analyzer" +version = "0.1.0" +description = "CMM inspection report analyzer with SPC metrics and AI summaries" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115", + "uvicorn[standard]>=0.32", + "pdfplumber>=0.11", + "pandas>=2.2", + "openpyxl>=3.1", + "numpy>=2.0", + "scipy>=1.14", + "plotly>=5.24", + "openai>=1.50", + "pydantic-settings>=2.6", + "python-multipart>=0.0.12", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-asyncio>=0.24", + "httpx>=0.27", +] + +[tool.pytest.ini_options] +asyncio_mode = "auto" + +[build-system] +requires = ["setuptools>=75"] +build-backend = "setuptools.build_meta" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..977265c --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,72 @@ +import io +import tempfile +from pathlib import Path + +import pandas as pd +import pytest +from httpx import ASGITransport, AsyncClient + +from app.main import app + + +@pytest.fixture +def sample_excel() -> bytes: + df = pd.DataFrame({ + "Feature Name": ["D1", "D1", "D1", "D2", "D2", "D2"], + "Nominal": [10.0, 10.0, 10.0, 20.0, 20.0, 20.0], + "Actual": [10.02, 9.99, 10.01, 20.05, 19.97, 20.02], + "Tol+": [0.05, 0.05, 0.05, 0.10, 0.10, 0.10], + "Tol-": [-0.05, -0.05, -0.05, -0.10, -0.10, -0.10], + }) + buf = io.BytesIO() + df.to_excel(buf, index=False) + buf.seek(0) + return buf.read() + + +@pytest.mark.asyncio +async def test_upload_and_results(sample_excel): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.post( + "/api/upload", + files=[("files", ("test.xlsx", sample_excel, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))], + ) + assert resp.status_code == 200 + data = resp.json() + assert "batch_id" in data + + resp2 = await client.get(f"/api/results/{data['batch_id']}") + assert resp2.status_code == 200 + result = resp2.json() + assert result["status"] == "complete" + assert len(result["files"]) == 1 + assert result["files"][0]["filename"] == "test.xlsx" + assert len(result["files"][0]["spc"]) == 2 # D1 and D2 + + +@pytest.mark.asyncio +async def test_upload_no_files(): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.post("/api/upload", files=[]) + assert resp.status_code in (400, 422) + + +@pytest.mark.asyncio +async def test_upload_unsupported_type(): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.post( + "/api/upload", + files=[("files", ("test.png", b"fake", "image/png"))], + ) + assert resp.status_code == 400 + + +@pytest.mark.asyncio +async def test_results_not_found(): + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + resp = await client.get("/api/results/nonexistent") + assert resp.status_code == 404 diff --git a/tests/test_parsers.py b/tests/test_parsers.py new file mode 100644 index 0000000..f988cb8 --- /dev/null +++ b/tests/test_parsers.py @@ -0,0 +1,90 @@ +import tempfile +from pathlib import Path + +import pandas as pd + +from app.parsers.base import get_parser, match_column +from app.parsers.excel_parser import ExcelParser +from app.parsers.models import MeasurementRecord + + +def test_match_column_nominal(): + assert match_column("Nominal") == "nominal" + assert match_column("NOM") == "nominal" + assert match_column("Target Value") == "nominal" + + +def test_match_column_actual(): + assert match_column("Actual") == "actual" + assert match_column("Measured") == "actual" + + +def test_match_column_unknown(): + assert match_column("random_xyz") is None + + +def test_get_parser_pdf(): + p = get_parser("report.pdf") + from app.parsers.pdf_parser import PDFParser + assert isinstance(p, PDFParser) + + +def test_get_parser_excel(): + p = get_parser("data.xlsx") + assert isinstance(p, ExcelParser) + + +def test_get_parser_csv(): + p = get_parser("data.csv") + assert isinstance(p, ExcelParser) + + +def test_get_parser_unsupported(): + try: + get_parser("image.png") + assert False, "Should have raised" + except ValueError: + pass + + +def test_measurement_record_properties(): + rec = MeasurementRecord( + feature_name="D1", + nominal=10.0, + tolerance_plus=0.05, + tolerance_minus=-0.05, + actual=10.02, + ) + assert rec.usl == 10.05 + assert rec.lsl == 9.95 + assert rec.in_tolerance is True + + +def test_measurement_record_out_of_tolerance(): + rec = MeasurementRecord( + feature_name="D1", + nominal=10.0, + tolerance_plus=0.05, + tolerance_minus=-0.05, + actual=10.10, + ) + assert rec.in_tolerance is False + + +def test_excel_parser_with_standard_headers(): + df = pd.DataFrame({ + "Feature Name": ["D1", "D2", "D3"], + "Nominal": [10.0, 20.0, 30.0], + "Actual": [10.02, 19.98, 30.05], + "Tol+": [0.05, 0.10, 0.10], + "Tol-": [-0.05, -0.10, -0.10], + }) + with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f: + df.to_excel(f.name, index=False) + parser = ExcelParser() + report = parser.parse(Path(f.name)) + + assert len(report.measurements) == 3 + assert report.measurements[0].feature_name == "D1" + assert report.measurements[0].nominal == 10.0 + assert report.measurements[0].actual == 10.02 diff --git a/tests/test_spc.py b/tests/test_spc.py new file mode 100644 index 0000000..1998c94 --- /dev/null +++ b/tests/test_spc.py @@ -0,0 +1,84 @@ +from app.analysis.spc import SPCResult, calculate_spc +from app.parsers.models import MeasurementRecord + + +def _make_records(name: str, nominal: float, tol: float, actuals: list[float]): + return [ + MeasurementRecord( + feature_name=name, + nominal=nominal, + tolerance_plus=tol, + tolerance_minus=-tol, + actual=a, + deviation=a - nominal, + ) + for a in actuals + ] + + +def test_single_measurement_returns_none_indices(): + records = _make_records("D1", 10.0, 0.05, [10.01]) + results = calculate_spc(records) + assert len(results) == 1 + r = results[0] + assert r.cp is None + assert r.cpk is None + assert r.pp is None + assert r.ppk is None + + +def test_basic_spc_calculation(): + actuals = [10.01, 10.02, 9.99, 10.00, 10.03, 9.98, 10.01, 10.02, 9.99, 10.00] + records = _make_records("D1", 10.0, 0.05, actuals) + results = calculate_spc(records) + + assert len(results) == 1 + r = results[0] + assert r.n == 10 + assert r.pp is not None + assert r.ppk is not None + assert r.cp is not None + assert r.cpk is not None + assert r.pp > 0 + assert r.cpk > 0 + assert r.out_of_spec_count == 0 + + +def test_out_of_spec_count(): + actuals = [10.0, 10.06, 9.94, 10.0, 10.0] # 10.06 and 9.94 outside ±0.05 + records = _make_records("D1", 10.0, 0.05, actuals) + results = calculate_spc(records) + assert results[0].out_of_spec_count == 2 + + +def test_multiple_features(): + records = ( + _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99]) + + _make_records("D2", 20.0, 0.10, [20.05, 19.95, 20.01]) + ) + results = calculate_spc(records) + assert len(results) == 2 + names = {r.feature_name for r in results} + assert names == {"D1", "D2"} + + +def test_shapiro_not_computed_for_small_n(): + records = _make_records("D1", 10.0, 0.05, [10.01, 10.02]) + results = calculate_spc(records) + assert results[0].shapiro_p is None + + +def test_shapiro_computed_for_n_ge_3(): + records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99]) + results = calculate_spc(records) + assert results[0].shapiro_p is not None + + +def test_to_dict(): + records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99, 10.00, 10.03]) + results = calculate_spc(records) + d = results[0].to_dict() + assert "feature_name" in d + assert "cpk" in d + assert "values" in d + assert isinstance(d["values"], list)