Initial commit: CMM Report Analyzer

FastAPI app that parses CMM inspection reports (PDF/Excel/CSV), computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk), generates interactive Plotly charts, and provides AI-powered quality summaries via Azure OpenAI with graceful fallback. Includes 21 passing tests covering parsers, SPC calculations, and API endpoints.
2026-02-19 10:38:51 -06:00
commit 9abf9b4b58
28 changed files with 1727 additions and 0 deletions
--- a/app/parsers/excel_parser.py
+++ b/app/parsers/excel_parser.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+from app.parsers.base import CMMParser, match_column
+from app.parsers.models import MeasurementRecord, ParsedReport
+
+
+class ExcelParser(CMMParser):
+    def parse(self, path: Path) -> ParsedReport:
+        if path.suffix.lower() == ".csv":
+            df = pd.read_csv(path)
+        else:
+            df = pd.read_excel(path, engine="openpyxl")
+
+        col_map = self._map_columns(df.columns.tolist())
+        measurements = self._extract(df, col_map)
+        return ParsedReport(
+            filename=path.name,
+            measurements=measurements,
+            metadata={"source": "excel", "rows": str(len(df))},
+            raw_text=df.to_string(max_rows=200),
+        )
+
+    def _map_columns(self, headers: list[str]) -> dict[str, str]:
+        """Map canonical field names to actual DataFrame column names."""
+        mapping: dict[str, str] = {}
+        for header in headers:
+            canonical = match_column(str(header))
+            if canonical and canonical not in mapping:
+                mapping[canonical] = str(header)
+        return mapping
+
+    def _extract(
+        self, df: pd.DataFrame, col_map: dict[str, str]
+    ) -> list[MeasurementRecord]:
+        required = {"feature_name", "nominal", "actual"}
+        if not required.issubset(col_map):
+            return self._fallback_extract(df)
+
+        records: list[MeasurementRecord] = []
+        for _, row in df.iterrows():
+            try:
+                nominal = float(row[col_map["nominal"]])
+                actual = float(row[col_map["actual"]])
+                tol_plus = (
+                    float(row[col_map["tolerance_plus"]])
+                    if "tolerance_plus" in col_map
+                    else 0.0
+                )
+                tol_minus = (
+                    float(row[col_map["tolerance_minus"]])
+                    if "tolerance_minus" in col_map
+                    else 0.0
+                )
+                deviation = (
+                    float(row[col_map["deviation"]])
+                    if "deviation" in col_map
+                    else actual - nominal
+                )
+                records.append(
+                    MeasurementRecord(
+                        feature_name=str(row[col_map["feature_name"]]),
+                        nominal=nominal,
+                        tolerance_plus=abs(tol_plus),
+                        tolerance_minus=-abs(tol_minus),
+                        actual=actual,
+                        deviation=deviation,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records
+
+    def _fallback_extract(self, df: pd.DataFrame) -> list[MeasurementRecord]:
+        """Best-effort extraction when column mapping is incomplete.
+
+        Treats the first string column as the feature name and the first
+        three numeric columns as nominal, actual, tolerance_plus (with
+        tolerance_minus mirrored).
+        """
+        numeric_cols = df.select_dtypes(include="number").columns.tolist()
+        str_cols = df.select_dtypes(include="object").columns.tolist()
+        if len(numeric_cols) < 2 or not str_cols:
+            return []
+
+        name_col = str_cols[0]
+        nom_col = numeric_cols[0]
+        act_col = numeric_cols[1]
+        tol_col = numeric_cols[2] if len(numeric_cols) > 2 else None
+
+        records: list[MeasurementRecord] = []
+        for _, row in df.iterrows():
+            try:
+                nominal = float(row[nom_col])
+                actual = float(row[act_col])
+                tol = float(row[tol_col]) if tol_col else 0.0
+                records.append(
+                    MeasurementRecord(
+                        feature_name=str(row[name_col]),
+                        nominal=nominal,
+                        tolerance_plus=abs(tol),
+                        tolerance_minus=-abs(tol),
+                        actual=actual,
+                        deviation=actual - nominal,
+                    )
+                )
+            except (ValueError, TypeError):
+                continue
+        return records