Initial commit: CMM Report Analyzer

FastAPI app that parses CMM inspection reports (PDF/Excel/CSV),
computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk),
generates interactive Plotly charts, and provides AI-powered quality
summaries via Azure OpenAI with graceful fallback.

Includes 21 passing tests covering parsers, SPC calculations, and
API endpoints.
This commit is contained in:
chrisryn
2026-02-19 10:38:51 -06:00
commit 9abf9b4b58
28 changed files with 1727 additions and 0 deletions

0
tests/__init__.py Normal file
View File

72
tests/test_api.py Normal file
View File

@@ -0,0 +1,72 @@
import io
import tempfile
from pathlib import Path
import pandas as pd
import pytest
from httpx import ASGITransport, AsyncClient
from app.main import app
@pytest.fixture
def sample_excel() -> bytes:
df = pd.DataFrame({
"Feature Name": ["D1", "D1", "D1", "D2", "D2", "D2"],
"Nominal": [10.0, 10.0, 10.0, 20.0, 20.0, 20.0],
"Actual": [10.02, 9.99, 10.01, 20.05, 19.97, 20.02],
"Tol+": [0.05, 0.05, 0.05, 0.10, 0.10, 0.10],
"Tol-": [-0.05, -0.05, -0.05, -0.10, -0.10, -0.10],
})
buf = io.BytesIO()
df.to_excel(buf, index=False)
buf.seek(0)
return buf.read()
@pytest.mark.asyncio
async def test_upload_and_results(sample_excel):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
"/api/upload",
files=[("files", ("test.xlsx", sample_excel, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))],
)
assert resp.status_code == 200
data = resp.json()
assert "batch_id" in data
resp2 = await client.get(f"/api/results/{data['batch_id']}")
assert resp2.status_code == 200
result = resp2.json()
assert result["status"] == "complete"
assert len(result["files"]) == 1
assert result["files"][0]["filename"] == "test.xlsx"
assert len(result["files"][0]["spc"]) == 2 # D1 and D2
@pytest.mark.asyncio
async def test_upload_no_files():
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post("/api/upload", files=[])
assert resp.status_code in (400, 422)
@pytest.mark.asyncio
async def test_upload_unsupported_type():
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
"/api/upload",
files=[("files", ("test.png", b"fake", "image/png"))],
)
assert resp.status_code == 400
@pytest.mark.asyncio
async def test_results_not_found():
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/api/results/nonexistent")
assert resp.status_code == 404

90
tests/test_parsers.py Normal file
View File

@@ -0,0 +1,90 @@
import tempfile
from pathlib import Path
import pandas as pd
from app.parsers.base import get_parser, match_column
from app.parsers.excel_parser import ExcelParser
from app.parsers.models import MeasurementRecord
def test_match_column_nominal():
assert match_column("Nominal") == "nominal"
assert match_column("NOM") == "nominal"
assert match_column("Target Value") == "nominal"
def test_match_column_actual():
assert match_column("Actual") == "actual"
assert match_column("Measured") == "actual"
def test_match_column_unknown():
assert match_column("random_xyz") is None
def test_get_parser_pdf():
p = get_parser("report.pdf")
from app.parsers.pdf_parser import PDFParser
assert isinstance(p, PDFParser)
def test_get_parser_excel():
p = get_parser("data.xlsx")
assert isinstance(p, ExcelParser)
def test_get_parser_csv():
p = get_parser("data.csv")
assert isinstance(p, ExcelParser)
def test_get_parser_unsupported():
try:
get_parser("image.png")
assert False, "Should have raised"
except ValueError:
pass
def test_measurement_record_properties():
rec = MeasurementRecord(
feature_name="D1",
nominal=10.0,
tolerance_plus=0.05,
tolerance_minus=-0.05,
actual=10.02,
)
assert rec.usl == 10.05
assert rec.lsl == 9.95
assert rec.in_tolerance is True
def test_measurement_record_out_of_tolerance():
rec = MeasurementRecord(
feature_name="D1",
nominal=10.0,
tolerance_plus=0.05,
tolerance_minus=-0.05,
actual=10.10,
)
assert rec.in_tolerance is False
def test_excel_parser_with_standard_headers():
df = pd.DataFrame({
"Feature Name": ["D1", "D2", "D3"],
"Nominal": [10.0, 20.0, 30.0],
"Actual": [10.02, 19.98, 30.05],
"Tol+": [0.05, 0.10, 0.10],
"Tol-": [-0.05, -0.10, -0.10],
})
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
df.to_excel(f.name, index=False)
parser = ExcelParser()
report = parser.parse(Path(f.name))
assert len(report.measurements) == 3
assert report.measurements[0].feature_name == "D1"
assert report.measurements[0].nominal == 10.0
assert report.measurements[0].actual == 10.02

84
tests/test_spc.py Normal file
View File

@@ -0,0 +1,84 @@
from app.analysis.spc import SPCResult, calculate_spc
from app.parsers.models import MeasurementRecord
def _make_records(name: str, nominal: float, tol: float, actuals: list[float]):
return [
MeasurementRecord(
feature_name=name,
nominal=nominal,
tolerance_plus=tol,
tolerance_minus=-tol,
actual=a,
deviation=a - nominal,
)
for a in actuals
]
def test_single_measurement_returns_none_indices():
records = _make_records("D1", 10.0, 0.05, [10.01])
results = calculate_spc(records)
assert len(results) == 1
r = results[0]
assert r.cp is None
assert r.cpk is None
assert r.pp is None
assert r.ppk is None
def test_basic_spc_calculation():
actuals = [10.01, 10.02, 9.99, 10.00, 10.03, 9.98, 10.01, 10.02, 9.99, 10.00]
records = _make_records("D1", 10.0, 0.05, actuals)
results = calculate_spc(records)
assert len(results) == 1
r = results[0]
assert r.n == 10
assert r.pp is not None
assert r.ppk is not None
assert r.cp is not None
assert r.cpk is not None
assert r.pp > 0
assert r.cpk > 0
assert r.out_of_spec_count == 0
def test_out_of_spec_count():
actuals = [10.0, 10.06, 9.94, 10.0, 10.0] # 10.06 and 9.94 outside ±0.05
records = _make_records("D1", 10.0, 0.05, actuals)
results = calculate_spc(records)
assert results[0].out_of_spec_count == 2
def test_multiple_features():
records = (
_make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
+ _make_records("D2", 20.0, 0.10, [20.05, 19.95, 20.01])
)
results = calculate_spc(records)
assert len(results) == 2
names = {r.feature_name for r in results}
assert names == {"D1", "D2"}
def test_shapiro_not_computed_for_small_n():
records = _make_records("D1", 10.0, 0.05, [10.01, 10.02])
results = calculate_spc(records)
assert results[0].shapiro_p is None
def test_shapiro_computed_for_n_ge_3():
records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
results = calculate_spc(records)
assert results[0].shapiro_p is not None
def test_to_dict():
records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99, 10.00, 10.03])
results = calculate_spc(records)
d = results[0].to_dict()
assert "feature_name" in d
assert "cpk" in d
assert "values" in d
assert isinstance(d["values"], list)