Initial commit: Homelab Dashboard with YAML configuration

Features: - Service health monitoring with response times - Proxmox cluster integration (nodes, VMs, containers) - PBS backup server monitoring - Camera viewer with WebRTC (go2rtc) - Docker container monitoring - Uptime Kuma integration - Mobile-friendly responsive design - YAML-based configuration for easy setup
2026-02-02 20:27:05 +00:00
commit 89cdb022f3
25 changed files with 2437 additions and 0 deletions
--- a/app/services/init.py
+++ b/app/services/init.py
--- a/app/services/health.py
+++ b/app/services/health.py
@@ -0,0 +1,391 @@
+"""Enhanced services module v2 with PBS, VM/LXC, storage pools, events."""
+import asyncio
+from typing import Dict, Any, Optional, List
+import httpx
+from dataclasses import dataclass, field
+from datetime import datetime
+from collections import deque
+
+@dataclass
+class HealthStatus:
+    name: str
+    status: str
+    response_time_ms: Optional[float] = None
+    error: Optional[str] = None
+
+@dataclass 
+class NodeStatus:
+    name: str
+    ip: str
+    status: str
+    cpu_percent: Optional[float] = None
+    memory_percent: Optional[float] = None
+    memory_used_gb: Optional[float] = None
+    memory_total_gb: Optional[float] = None
+    disk_percent: Optional[float] = None
+    uptime_hours: Optional[float] = None
+    vms: List[Dict] = field(default_factory=list)
+    containers: List[Dict] = field(default_factory=list)
+
+@dataclass
+class DockerContainer:
+    name: str
+    status: str
+    state: str
+    image: str
+    host: str
+
+@dataclass
+class UptimeMonitor:
+    id: int
+    name: str
+    status: int
+    ping: Optional[int] = None
+    heartbeats: Optional[List[Dict]] = None
+
+@dataclass
+class PBSStatus:
+    status: str
+    datastore_usage: List[Dict] = field(default_factory=list)
+    last_backup: Optional[str] = None
+    total_size_gb: float = 0
+    used_size_gb: float = 0
+
+@dataclass
+class StoragePool:
+    name: str
+    node: str
+    total_gb: float
+    used_gb: float
+    avail_gb: float
+    percent_used: float
+    pool_type: str
+
+@dataclass
+class StatusEvent:
+    timestamp: datetime
+    service: str
+    old_status: str
+    new_status: str
+
+# Recent events storage (in-memory, last 20)
+recent_events: deque = deque(maxlen=20)
+last_status_cache: Dict[str, str] = {}
+
+SERVICE_CHECK_OVERRIDES = {
+    "OPNsense": ("https://192.168.1.1:8443/", 10.0),
+    "Vaultwarden": ("https://vault.deathstar-home.one/", 5.0),
+    "Immich": ("http://192.168.1.54:2283/", 5.0),
+}
+
+async def check_service(client: httpx.AsyncClient, service) -> HealthStatus:
+    """Check if a service is reachable."""
+    global last_status_cache, recent_events
+    
+    if service.name in SERVICE_CHECK_OVERRIDES:
+        check_url, timeout = SERVICE_CHECK_OVERRIDES[service.name]
+    else:
+        https_ports = [443, 8006, 8007, 8443, 9443]
+        scheme = "https" if service.port in https_ports else "http"
+        check_url = f"{scheme}://{service.ip}:{service.port}/"
+        timeout = 5.0
+    
+    start = asyncio.get_event_loop().time()
+    try:
+        response = await client.get(check_url, timeout=timeout, follow_redirects=True)
+        elapsed = (asyncio.get_event_loop().time() - start) * 1000
+        new_status = "online" if response.status_code < 500 else "degraded"
+        result = HealthStatus(name=service.name, status=new_status, response_time_ms=round(elapsed, 1))
+    except:
+        new_status = "offline"
+        result = HealthStatus(name=service.name, status="offline")
+    
+    # Track status changes
+    old_status = last_status_cache.get(service.name)
+    if old_status and old_status != new_status:
+        recent_events.append(StatusEvent(
+            timestamp=datetime.now(),
+            service=service.name,
+            old_status=old_status,
+            new_status=new_status
+        ))
+    last_status_cache[service.name] = new_status
+    
+    return result
+
+async def check_all_services(services) -> Dict[str, HealthStatus]:
+    """Check all services concurrently."""
+    async with httpx.AsyncClient(verify=False, timeout=10.0) as client:
+        tasks = [check_service(client, s) for s in services]
+        results = await asyncio.gather(*tasks)
+        return {r.name: r for r in results}
+
+async def get_proxmox_node_metrics(client: httpx.AsyncClient, node: Dict, token: str, secret: str) -> NodeStatus:
+    """Get Proxmox node metrics including VMs and containers."""
+    base_url = f"https://{node['ip']}:{node['port']}/api2/json"
+    headers = {"Authorization": f"PVEAPIToken={token}={secret}"}
+    
+    result = NodeStatus(name=node["name"], ip=node["ip"], status="offline")
+    
+    try:
+        # Get node status
+        response = await client.get(f"{base_url}/nodes/{node['name']}/status", headers=headers, timeout=5.0)
+        if response.status_code == 200:
+            data = response.json()["data"]
+            cpu = data.get("cpu", 0) * 100
+            mem_used = data.get("memory", {}).get("used", 0)
+            mem_total = data.get("memory", {}).get("total", 1)
+            mem_pct = (mem_used / mem_total) * 100 if mem_total else 0
+            disk_used = data.get("rootfs", {}).get("used", 0)
+            disk_total = data.get("rootfs", {}).get("total", 1)
+            disk_pct = (disk_used / disk_total) * 100 if disk_total else 0
+            uptime_sec = data.get("uptime", 0)
+            
+            result.status = "online"
+            result.cpu_percent = round(cpu, 1)
+            result.memory_percent = round(mem_pct, 1)
+            result.memory_used_gb = round(mem_used / (1024**3), 1)
+            result.memory_total_gb = round(mem_total / (1024**3), 1)
+            result.disk_percent = round(disk_pct, 1)
+            result.uptime_hours = round(uptime_sec / 3600, 1)
+        
+        # Get VMs
+        vm_response = await client.get(f"{base_url}/nodes/{node['name']}/qemu", headers=headers, timeout=5.0)
+        if vm_response.status_code == 200:
+            for vm in vm_response.json().get("data", []):
+                result.vms.append({
+                    "vmid": vm.get("vmid"),
+                    "name": vm.get("name", f"VM {vm.get('vmid')}"),
+                    "status": vm.get("status"),
+                    "mem": round(vm.get("mem", 0) / (1024**3), 1) if vm.get("mem") else 0,
+                    "cpu": round(vm.get("cpu", 0) * 100, 1) if vm.get("cpu") else 0,
+                })
+        
+        # Get containers
+        ct_response = await client.get(f"{base_url}/nodes/{node['name']}/lxc", headers=headers, timeout=5.0)
+        if ct_response.status_code == 200:
+            for ct in ct_response.json().get("data", []):
+                result.containers.append({
+                    "vmid": ct.get("vmid"),
+                    "name": ct.get("name", f"CT {ct.get('vmid')}"),
+                    "status": ct.get("status"),
+                    "mem": round(ct.get("mem", 0) / (1024**3), 1) if ct.get("mem") else 0,
+                    "cpu": round(ct.get("cpu", 0) * 100, 1) if ct.get("cpu") else 0,
+                })
+    except:
+        pass
+    
+    return result
+
+async def get_all_proxmox_metrics(nodes, token: str, secret: str) -> List[NodeStatus]:
+    """Get metrics for all Proxmox nodes."""
+    async with httpx.AsyncClient(verify=False) as client:
+        tasks = [get_proxmox_node_metrics(client, n, token, secret) for n in nodes]
+        return await asyncio.gather(*tasks)
+
+async def get_pbs_status(url: str, token: str, secret: str) -> PBSStatus:
+    """Get PBS backup server status."""
+    result = PBSStatus(status="offline")
+    headers = {"Authorization": f"PBSAPIToken={token}:{secret}"}
+    
+    try:
+        async with httpx.AsyncClient(verify=False, timeout=10.0) as client:
+            # Get datastore status
+            ds_response = await client.get(f"{url}/api2/json/status/datastore-usage", headers=headers)
+            if ds_response.status_code == 200:
+                result.status = "online"
+                for ds in ds_response.json().get("data", []):
+                    total = ds.get("total", 0)
+                    used = ds.get("used", 0)
+                    result.datastore_usage.append({
+                        "name": ds.get("store"),
+                        "total_gb": round(total / (1024**3), 1),
+                        "used_gb": round(used / (1024**3), 1),
+                        "percent": round((used / total) * 100, 1) if total else 0,
+                    })
+                    result.total_size_gb += total / (1024**3)
+                    result.used_size_gb += used / (1024**3)
+            
+            # Try to get last backup task
+            tasks_response = await client.get(f"{url}/api2/json/nodes/localhost/tasks", headers=headers)
+            if tasks_response.status_code == 200:
+                tasks = tasks_response.json().get("data", [])
+                backup_tasks = [t for t in tasks if t.get("type") == "backup"]
+                if backup_tasks:
+                    last = backup_tasks[0]
+                    result.last_backup = datetime.fromtimestamp(last.get("starttime", 0)).strftime("%Y-%m-%d %H:%M")
+    except:
+        pass
+    
+    return result
+
+async def get_storage_pools(nodes, token: str, secret: str) -> List[StoragePool]:
+    """Get storage pool info from all Proxmox nodes."""
+    pools = []
+    headers = {"Authorization": f"PVEAPIToken={token}={secret}"}
+    
+    async with httpx.AsyncClient(verify=False, timeout=10.0) as client:
+        for node in nodes:
+            try:
+                url = f"https://{node['ip']}:{node['port']}/api2/json/nodes/{node['name']}/storage"
+                response = await client.get(url, headers=headers)
+                if response.status_code == 200:
+                    for storage in response.json().get("data", []):
+                        if storage.get("enabled") and storage.get("total"):
+                            total = storage.get("total", 0)
+                            used = storage.get("used", 0)
+                            avail = storage.get("avail", 0)
+                            pools.append(StoragePool(
+                                name=storage.get("storage"),
+                                node=node["name"],
+                                total_gb=round(total / (1024**3), 1),
+                                used_gb=round(used / (1024**3), 1),
+                                avail_gb=round(avail / (1024**3), 1),
+                                percent_used=round((used / total) * 100, 1) if total else 0,
+                                pool_type=storage.get("type", "unknown"),
+                            ))
+            except:
+                pass
+    
+    return pools
+
+async def get_docker_containers(hosts: List[Dict]) -> List[DockerContainer]:
+    """Get Docker containers via docker-socket-proxy."""
+    containers = []
+    async with httpx.AsyncClient(timeout=5.0) as client:
+        for host in hosts:
+            try:
+                url = f"http://{host['ip']}:{host['port']}/containers/json?all=true"
+                response = await client.get(url)
+                if response.status_code == 200:
+                    for c in response.json():
+                        name = c.get("Names", ["/unknown"])[0].lstrip("/")
+                        if name == "docker-socket-proxy":
+                            continue
+                        containers.append(DockerContainer(
+                            name=name,
+                            status=c.get("Status", ""),
+                            state=c.get("State", "unknown"),
+                            image=c.get("Image", "").split("/")[-1].split(":")[0],
+                            host=host["name"]
+                        ))
+            except:
+                pass
+    return containers
+
+async def get_docker_container_counts(hosts: List[Dict]) -> Dict[str, int]:
+    """Get container counts per host."""
+    counts = {}
+    async with httpx.AsyncClient(timeout=5.0) as client:
+        for host in hosts:
+            try:
+                url = f"http://{host['ip']}:{host['port']}/containers/json"
+                response = await client.get(url)
+                if response.status_code == 200:
+                    # Subtract 1 for docker-socket-proxy
+                    count = len([c for c in response.json() if "docker-socket-proxy" not in c.get("Names", [""])[0]])
+                    counts[host["name"]] = count
+            except:
+                counts[host["name"]] = 0
+    return counts
+
+async def get_uptime_kuma_status(url: str, status_page: str = "uptime") -> Dict:
+    """Get Uptime Kuma status."""
+    result = {"monitors": [], "summary": {"up": 0, "down": 0, "total": 0}}
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            hb_response = await client.get(f"{url}/api/status-page/heartbeat/{status_page}")
+            info_response = await client.get(f"{url}/api/status-page/{status_page}")
+            
+            if hb_response.status_code == 200 and info_response.status_code == 200:
+                heartbeats = hb_response.json().get("heartbeatList", {})
+                info = info_response.json()
+                
+                for group in info.get("publicGroupList", []):
+                    for monitor in group.get("monitorList", []):
+                        monitor_id = str(monitor.get("id"))
+                        monitor_heartbeats = heartbeats.get(monitor_id, [])
+                        latest_status = 0
+                        latest_ping = None
+                        if monitor_heartbeats:
+                            latest = monitor_heartbeats[-1]
+                            latest_status = latest.get("status", 0)
+                            latest_ping = latest.get("ping")
+                        recent_hb = monitor_heartbeats[-20:] if monitor_heartbeats else []
+                        result["monitors"].append(UptimeMonitor(
+                            id=monitor.get("id"),
+                            name=monitor.get("name"),
+                            status=latest_status,
+                            ping=latest_ping,
+                            heartbeats=[{"status": h.get("status", 0), "ping": h.get("ping")} for h in recent_hb]
+                        ))
+                        if latest_status == 1:
+                            result["summary"]["up"] += 1
+                        else:
+                            result["summary"]["down"] += 1
+                        result["summary"]["total"] += 1
+    except:
+        pass
+    return result
+
+async def get_prometheus_metrics(url: str, queries: Dict[str, str]) -> Dict[str, Any]:
+    """Query Prometheus for metrics."""
+    results = {}
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            for name, query in queries.items():
+                response = await client.get(f"{url}/api/v1/query", params={"query": query})
+                if response.status_code == 200:
+                    data = response.json().get("data", {}).get("result", [])
+                    if data:
+                        results[name] = float(data[0].get("value", [0, 0])[1])
+    except:
+        pass
+    return results
+
+async def get_camera_list(go2rtc_url: str) -> List[str]:
+    """Get camera list from go2rtc."""
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.get(f"{go2rtc_url}/api/streams")
+            if response.status_code == 200:
+                return list(response.json().keys())
+    except:
+        pass
+    return []
+
+async def get_sabnzbd_queue(url: str, api_key: str = "") -> Dict:
+    """Get Sabnzbd download queue."""
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            params = {"mode": "queue", "output": "json"}
+            if api_key:
+                params["apikey"] = api_key
+            response = await client.get(f"{url}/api", params=params)
+            if response.status_code == 200:
+                data = response.json().get("queue", {})
+                return {
+                    "speed": data.get("speed", "0 B/s"),
+                    "size_left": data.get("sizeleft", "0 B"),
+                    "eta": data.get("timeleft", "Unknown"),
+                    "downloading": len(data.get("slots", [])),
+                    "items": [
+                        {"name": s.get("filename", "Unknown")[:40], "progress": float(s.get("percentage", 0))}
+                        for s in data.get("slots", [])[:3]
+                    ]
+                }
+    except:
+        pass
+    return {"speed": "N/A", "downloading": 0, "items": []}
+
+def get_recent_events() -> List[StatusEvent]:
+    """Get recent status change events."""
+    return list(recent_events)
+
+def get_cluster_uptime(nodes: List[NodeStatus]) -> float:
+    """Calculate total cluster uptime in hours."""
+    total = 0
+    for node in nodes:
+        if node.uptime_hours:
+            total += node.uptime_hours
+    return round(total, 1)