FastAPI app that parses CMM inspection reports (PDF/Excel/CSV), computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk), generates interactive Plotly charts, and provides AI-powered quality summaries via Azure OpenAI with graceful fallback. Includes 21 passing tests covering parsers, SPC calculations, and API endpoints.
113 lines
4.1 KiB
Python
113 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
|
|
from app.parsers.base import CMMParser, match_column
|
|
from app.parsers.models import MeasurementRecord, ParsedReport
|
|
|
|
|
|
class ExcelParser(CMMParser):
|
|
def parse(self, path: Path) -> ParsedReport:
|
|
if path.suffix.lower() == ".csv":
|
|
df = pd.read_csv(path)
|
|
else:
|
|
df = pd.read_excel(path, engine="openpyxl")
|
|
|
|
col_map = self._map_columns(df.columns.tolist())
|
|
measurements = self._extract(df, col_map)
|
|
return ParsedReport(
|
|
filename=path.name,
|
|
measurements=measurements,
|
|
metadata={"source": "excel", "rows": str(len(df))},
|
|
raw_text=df.to_string(max_rows=200),
|
|
)
|
|
|
|
def _map_columns(self, headers: list[str]) -> dict[str, str]:
|
|
"""Map canonical field names to actual DataFrame column names."""
|
|
mapping: dict[str, str] = {}
|
|
for header in headers:
|
|
canonical = match_column(str(header))
|
|
if canonical and canonical not in mapping:
|
|
mapping[canonical] = str(header)
|
|
return mapping
|
|
|
|
def _extract(
|
|
self, df: pd.DataFrame, col_map: dict[str, str]
|
|
) -> list[MeasurementRecord]:
|
|
required = {"feature_name", "nominal", "actual"}
|
|
if not required.issubset(col_map):
|
|
return self._fallback_extract(df)
|
|
|
|
records: list[MeasurementRecord] = []
|
|
for _, row in df.iterrows():
|
|
try:
|
|
nominal = float(row[col_map["nominal"]])
|
|
actual = float(row[col_map["actual"]])
|
|
tol_plus = (
|
|
float(row[col_map["tolerance_plus"]])
|
|
if "tolerance_plus" in col_map
|
|
else 0.0
|
|
)
|
|
tol_minus = (
|
|
float(row[col_map["tolerance_minus"]])
|
|
if "tolerance_minus" in col_map
|
|
else 0.0
|
|
)
|
|
deviation = (
|
|
float(row[col_map["deviation"]])
|
|
if "deviation" in col_map
|
|
else actual - nominal
|
|
)
|
|
records.append(
|
|
MeasurementRecord(
|
|
feature_name=str(row[col_map["feature_name"]]),
|
|
nominal=nominal,
|
|
tolerance_plus=abs(tol_plus),
|
|
tolerance_minus=-abs(tol_minus),
|
|
actual=actual,
|
|
deviation=deviation,
|
|
)
|
|
)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
return records
|
|
|
|
def _fallback_extract(self, df: pd.DataFrame) -> list[MeasurementRecord]:
|
|
"""Best-effort extraction when column mapping is incomplete.
|
|
|
|
Treats the first string column as the feature name and the first
|
|
three numeric columns as nominal, actual, tolerance_plus (with
|
|
tolerance_minus mirrored).
|
|
"""
|
|
numeric_cols = df.select_dtypes(include="number").columns.tolist()
|
|
str_cols = df.select_dtypes(include="object").columns.tolist()
|
|
if len(numeric_cols) < 2 or not str_cols:
|
|
return []
|
|
|
|
name_col = str_cols[0]
|
|
nom_col = numeric_cols[0]
|
|
act_col = numeric_cols[1]
|
|
tol_col = numeric_cols[2] if len(numeric_cols) > 2 else None
|
|
|
|
records: list[MeasurementRecord] = []
|
|
for _, row in df.iterrows():
|
|
try:
|
|
nominal = float(row[nom_col])
|
|
actual = float(row[act_col])
|
|
tol = float(row[tol_col]) if tol_col else 0.0
|
|
records.append(
|
|
MeasurementRecord(
|
|
feature_name=str(row[name_col]),
|
|
nominal=nominal,
|
|
tolerance_plus=abs(tol),
|
|
tolerance_minus=-abs(tol),
|
|
actual=actual,
|
|
deviation=actual - nominal,
|
|
)
|
|
)
|
|
except (ValueError, TypeError):
|
|
continue
|
|
return records
|