Initial commit: CMM Report Analyzer

FastAPI app that parses CMM inspection reports (PDF/Excel/CSV), computes SPC metrics (Cp/Cpk/Pp/Ppk, control limits, Shapiro-Wilk), generates interactive Plotly charts, and provides AI-powered quality summaries via Azure OpenAI with graceful fallback. Includes 21 passing tests covering parsers, SPC calculations, and API endpoints.
2026-02-19 10:38:51 -06:00
commit 9abf9b4b58
28 changed files with 1727 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -0,0 +1,72 @@
+import io
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app.main import app
+
+
+@pytest.fixture
+def sample_excel() -> bytes:
+    df = pd.DataFrame({
+        "Feature Name": ["D1", "D1", "D1", "D2", "D2", "D2"],
+        "Nominal": [10.0, 10.0, 10.0, 20.0, 20.0, 20.0],
+        "Actual": [10.02, 9.99, 10.01, 20.05, 19.97, 20.02],
+        "Tol+": [0.05, 0.05, 0.05, 0.10, 0.10, 0.10],
+        "Tol-": [-0.05, -0.05, -0.05, -0.10, -0.10, -0.10],
+    })
+    buf = io.BytesIO()
+    df.to_excel(buf, index=False)
+    buf.seek(0)
+    return buf.read()
+
+
+@pytest.mark.asyncio
+async def test_upload_and_results(sample_excel):
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post(
+            "/api/upload",
+            files=[("files", ("test.xlsx", sample_excel, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"))],
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "batch_id" in data
+
+        resp2 = await client.get(f"/api/results/{data['batch_id']}")
+        assert resp2.status_code == 200
+        result = resp2.json()
+        assert result["status"] == "complete"
+        assert len(result["files"]) == 1
+        assert result["files"][0]["filename"] == "test.xlsx"
+        assert len(result["files"][0]["spc"]) == 2  # D1 and D2
+
+
+@pytest.mark.asyncio
+async def test_upload_no_files():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post("/api/upload", files=[])
+        assert resp.status_code in (400, 422)
+
+
+@pytest.mark.asyncio
+async def test_upload_unsupported_type():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.post(
+            "/api/upload",
+            files=[("files", ("test.png", b"fake", "image/png"))],
+        )
+        assert resp.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_results_not_found():
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        resp = await client.get("/api/results/nonexistent")
+        assert resp.status_code == 404
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -0,0 +1,90 @@
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+from app.parsers.base import get_parser, match_column
+from app.parsers.excel_parser import ExcelParser
+from app.parsers.models import MeasurementRecord
+
+
+def test_match_column_nominal():
+    assert match_column("Nominal") == "nominal"
+    assert match_column("NOM") == "nominal"
+    assert match_column("Target Value") == "nominal"
+
+
+def test_match_column_actual():
+    assert match_column("Actual") == "actual"
+    assert match_column("Measured") == "actual"
+
+
+def test_match_column_unknown():
+    assert match_column("random_xyz") is None
+
+
+def test_get_parser_pdf():
+    p = get_parser("report.pdf")
+    from app.parsers.pdf_parser import PDFParser
+    assert isinstance(p, PDFParser)
+
+
+def test_get_parser_excel():
+    p = get_parser("data.xlsx")
+    assert isinstance(p, ExcelParser)
+
+
+def test_get_parser_csv():
+    p = get_parser("data.csv")
+    assert isinstance(p, ExcelParser)
+
+
+def test_get_parser_unsupported():
+    try:
+        get_parser("image.png")
+        assert False, "Should have raised"
+    except ValueError:
+        pass
+
+
+def test_measurement_record_properties():
+    rec = MeasurementRecord(
+        feature_name="D1",
+        nominal=10.0,
+        tolerance_plus=0.05,
+        tolerance_minus=-0.05,
+        actual=10.02,
+    )
+    assert rec.usl == 10.05
+    assert rec.lsl == 9.95
+    assert rec.in_tolerance is True
+
+
+def test_measurement_record_out_of_tolerance():
+    rec = MeasurementRecord(
+        feature_name="D1",
+        nominal=10.0,
+        tolerance_plus=0.05,
+        tolerance_minus=-0.05,
+        actual=10.10,
+    )
+    assert rec.in_tolerance is False
+
+
+def test_excel_parser_with_standard_headers():
+    df = pd.DataFrame({
+        "Feature Name": ["D1", "D2", "D3"],
+        "Nominal": [10.0, 20.0, 30.0],
+        "Actual": [10.02, 19.98, 30.05],
+        "Tol+": [0.05, 0.10, 0.10],
+        "Tol-": [-0.05, -0.10, -0.10],
+    })
+    with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
+        df.to_excel(f.name, index=False)
+        parser = ExcelParser()
+        report = parser.parse(Path(f.name))
+
+    assert len(report.measurements) == 3
+    assert report.measurements[0].feature_name == "D1"
+    assert report.measurements[0].nominal == 10.0
+    assert report.measurements[0].actual == 10.02
--- a/tests/test_spc.py
+++ b/tests/test_spc.py
@@ -0,0 +1,84 @@
+from app.analysis.spc import SPCResult, calculate_spc
+from app.parsers.models import MeasurementRecord
+
+
+def _make_records(name: str, nominal: float, tol: float, actuals: list[float]):
+    return [
+        MeasurementRecord(
+            feature_name=name,
+            nominal=nominal,
+            tolerance_plus=tol,
+            tolerance_minus=-tol,
+            actual=a,
+            deviation=a - nominal,
+        )
+        for a in actuals
+    ]
+
+
+def test_single_measurement_returns_none_indices():
+    records = _make_records("D1", 10.0, 0.05, [10.01])
+    results = calculate_spc(records)
+    assert len(results) == 1
+    r = results[0]
+    assert r.cp is None
+    assert r.cpk is None
+    assert r.pp is None
+    assert r.ppk is None
+
+
+def test_basic_spc_calculation():
+    actuals = [10.01, 10.02, 9.99, 10.00, 10.03, 9.98, 10.01, 10.02, 9.99, 10.00]
+    records = _make_records("D1", 10.0, 0.05, actuals)
+    results = calculate_spc(records)
+
+    assert len(results) == 1
+    r = results[0]
+    assert r.n == 10
+    assert r.pp is not None
+    assert r.ppk is not None
+    assert r.cp is not None
+    assert r.cpk is not None
+    assert r.pp > 0
+    assert r.cpk > 0
+    assert r.out_of_spec_count == 0
+
+
+def test_out_of_spec_count():
+    actuals = [10.0, 10.06, 9.94, 10.0, 10.0]  # 10.06 and 9.94 outside ±0.05
+    records = _make_records("D1", 10.0, 0.05, actuals)
+    results = calculate_spc(records)
+    assert results[0].out_of_spec_count == 2
+
+
+def test_multiple_features():
+    records = (
+        _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
+        + _make_records("D2", 20.0, 0.10, [20.05, 19.95, 20.01])
+    )
+    results = calculate_spc(records)
+    assert len(results) == 2
+    names = {r.feature_name for r in results}
+    assert names == {"D1", "D2"}
+
+
+def test_shapiro_not_computed_for_small_n():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02])
+    results = calculate_spc(records)
+    assert results[0].shapiro_p is None
+
+
+def test_shapiro_computed_for_n_ge_3():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99])
+    results = calculate_spc(records)
+    assert results[0].shapiro_p is not None
+
+
+def test_to_dict():
+    records = _make_records("D1", 10.0, 0.05, [10.01, 10.02, 9.99, 10.00, 10.03])
+    results = calculate_spc(records)
+    d = results[0].to_dict()
+    assert "feature_name" in d
+    assert "cpk" in d
+    assert "values" in d
+    assert isinstance(d["values"], list)