Initial commit: CMM Report Analyzer
FastAPI app that parses CMM inspection reports (PDF/Excel/CSV), computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk), generates interactive Plotly charts, and provides AI-powered quality summaries via Azure OpenAI with graceful fallback. Includes 21 passing tests covering parsers, SPC calculations, and API endpoints.
This commit is contained in:
112
app/parsers/excel_parser.py
Normal file
112
app/parsers/excel_parser.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from app.parsers.base import CMMParser, match_column
|
||||
from app.parsers.models import MeasurementRecord, ParsedReport
|
||||
|
||||
|
||||
class ExcelParser(CMMParser):
|
||||
def parse(self, path: Path) -> ParsedReport:
|
||||
if path.suffix.lower() == ".csv":
|
||||
df = pd.read_csv(path)
|
||||
else:
|
||||
df = pd.read_excel(path, engine="openpyxl")
|
||||
|
||||
col_map = self._map_columns(df.columns.tolist())
|
||||
measurements = self._extract(df, col_map)
|
||||
return ParsedReport(
|
||||
filename=path.name,
|
||||
measurements=measurements,
|
||||
metadata={"source": "excel", "rows": str(len(df))},
|
||||
raw_text=df.to_string(max_rows=200),
|
||||
)
|
||||
|
||||
def _map_columns(self, headers: list[str]) -> dict[str, str]:
|
||||
"""Map canonical field names to actual DataFrame column names."""
|
||||
mapping: dict[str, str] = {}
|
||||
for header in headers:
|
||||
canonical = match_column(str(header))
|
||||
if canonical and canonical not in mapping:
|
||||
mapping[canonical] = str(header)
|
||||
return mapping
|
||||
|
||||
def _extract(
|
||||
self, df: pd.DataFrame, col_map: dict[str, str]
|
||||
) -> list[MeasurementRecord]:
|
||||
required = {"feature_name", "nominal", "actual"}
|
||||
if not required.issubset(col_map):
|
||||
return self._fallback_extract(df)
|
||||
|
||||
records: list[MeasurementRecord] = []
|
||||
for _, row in df.iterrows():
|
||||
try:
|
||||
nominal = float(row[col_map["nominal"]])
|
||||
actual = float(row[col_map["actual"]])
|
||||
tol_plus = (
|
||||
float(row[col_map["tolerance_plus"]])
|
||||
if "tolerance_plus" in col_map
|
||||
else 0.0
|
||||
)
|
||||
tol_minus = (
|
||||
float(row[col_map["tolerance_minus"]])
|
||||
if "tolerance_minus" in col_map
|
||||
else 0.0
|
||||
)
|
||||
deviation = (
|
||||
float(row[col_map["deviation"]])
|
||||
if "deviation" in col_map
|
||||
else actual - nominal
|
||||
)
|
||||
records.append(
|
||||
MeasurementRecord(
|
||||
feature_name=str(row[col_map["feature_name"]]),
|
||||
nominal=nominal,
|
||||
tolerance_plus=abs(tol_plus),
|
||||
tolerance_minus=-abs(tol_minus),
|
||||
actual=actual,
|
||||
deviation=deviation,
|
||||
)
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return records
|
||||
|
||||
def _fallback_extract(self, df: pd.DataFrame) -> list[MeasurementRecord]:
|
||||
"""Best-effort extraction when column mapping is incomplete.
|
||||
|
||||
Treats the first string column as the feature name and the first
|
||||
three numeric columns as nominal, actual, tolerance_plus (with
|
||||
tolerance_minus mirrored).
|
||||
"""
|
||||
numeric_cols = df.select_dtypes(include="number").columns.tolist()
|
||||
str_cols = df.select_dtypes(include="object").columns.tolist()
|
||||
if len(numeric_cols) < 2 or not str_cols:
|
||||
return []
|
||||
|
||||
name_col = str_cols[0]
|
||||
nom_col = numeric_cols[0]
|
||||
act_col = numeric_cols[1]
|
||||
tol_col = numeric_cols[2] if len(numeric_cols) > 2 else None
|
||||
|
||||
records: list[MeasurementRecord] = []
|
||||
for _, row in df.iterrows():
|
||||
try:
|
||||
nominal = float(row[nom_col])
|
||||
actual = float(row[act_col])
|
||||
tol = float(row[tol_col]) if tol_col else 0.0
|
||||
records.append(
|
||||
MeasurementRecord(
|
||||
feature_name=str(row[name_col]),
|
||||
nominal=nominal,
|
||||
tolerance_plus=abs(tol),
|
||||
tolerance_minus=-abs(tol),
|
||||
actual=actual,
|
||||
deviation=actual - nominal,
|
||||
)
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return records
|
||||
Reference in New Issue
Block a user