Files
cmm-report-analyzer/app/parsers/base.py
chrisryn 9abf9b4b58 Initial commit: CMM Report Analyzer
FastAPI app that parses CMM inspection reports (PDF/Excel/CSV),
computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk),
generates interactive Plotly charts, and provides AI-powered quality
summaries via Azure OpenAI with graceful fallback.

Includes 21 passing tests covering parsers, SPC calculations, and
API endpoints.
2026-02-19 10:38:51 -06:00

56 lines
1.6 KiB
Python

from __future__ import annotations
import re
from abc import ABC, abstractmethod
from pathlib import Path
from app.parsers.models import ParsedReport
# Fuzzy column-name patterns → canonical field name
COLUMN_PATTERNS: dict[str, list[re.Pattern[str]]] = {
"feature_name": [
re.compile(r"feat|char|dimen|label|id|name|item", re.I),
],
"nominal": [
re.compile(r"nom|target|blueprint|print", re.I),
],
"tolerance_plus": [
re.compile(r"tol.*\+|upper.*tol|\+.*tol|usl|dev.*\+|pos.*tol", re.I),
],
"tolerance_minus": [
re.compile(r"tol.*-|lower.*tol|-.*tol|lsl|dev.*-|neg.*tol", re.I),
],
"actual": [
re.compile(r"actual|meas|value|result|reading", re.I),
],
"deviation": [
re.compile(r"dev(?!.*tol)|diff|error|delta", re.I),
],
}
def match_column(header: str) -> str | None:
"""Return the canonical field name for a header string, or None."""
header = header.strip()
for field_name, patterns in COLUMN_PATTERNS.items():
for pat in patterns:
if pat.search(header):
return field_name
return None
class CMMParser(ABC):
@abstractmethod
def parse(self, path: Path) -> ParsedReport: ...
def get_parser(filename: str) -> CMMParser:
suffix = Path(filename).suffix.lower()
if suffix == ".pdf":
from app.parsers.pdf_parser import PDFParser
return PDFParser()
if suffix in (".xlsx", ".xls", ".csv"):
from app.parsers.excel_parser import ExcelParser
return ExcelParser()
raise ValueError(f"Unsupported file type: {suffix}")