ems-core v1.0.0: Standard EMS platform core
Shared backend + frontend for multi-customer EMS deployments. - 12 enterprise modules: quota, cost, charging, maintenance, analysis, etc. - 120+ API endpoints, 37 database tables - Customer config mechanism (CUSTOMER env var + YAML config) - Collectors: Modbus TCP, MQTT, HTTP API, Sungrow iSolarCloud - Frontend: React 19 + Ant Design + ECharts + Three.js - Infrastructure: Redis cache, rate limiting, aggregation engine Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
253
backend/app/services/alarm_checker.py
Normal file
253
backend/app/services/alarm_checker.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""告警检测服务 - 根据告警规则检查最新数据,生成/自动恢复告警事件"""
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
from sqlalchemy import select, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.models.alarm import AlarmRule, AlarmEvent
|
||||
from app.models.energy import EnergyData
|
||||
from app.models.device import Device
|
||||
|
||||
logger = logging.getLogger("alarm_checker")
|
||||
|
||||
# Alarm email template path
|
||||
_ALARM_TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "templates" / "alarm_email.html"
|
||||
|
||||
# Severity display config
|
||||
_SEVERITY_CONFIG = {
|
||||
"critical": {
|
||||
"label": "紧急告警",
|
||||
"badge_color": "#d32f2f",
|
||||
"bg_color": "#ffebee",
|
||||
"text_color": "#c62828",
|
||||
},
|
||||
"major": {
|
||||
"label": "重要告警",
|
||||
"badge_color": "#e65100",
|
||||
"bg_color": "#fff3e0",
|
||||
"text_color": "#e65100",
|
||||
},
|
||||
"warning": {
|
||||
"label": "一般告警",
|
||||
"badge_color": "#f9a825",
|
||||
"bg_color": "#fffde7",
|
||||
"text_color": "#f57f17",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _send_alarm_email(
|
||||
rule: AlarmRule, event: AlarmEvent, device_id: int, session: AsyncSession
|
||||
):
|
||||
"""Send alarm notification email if configured."""
|
||||
from app.services.email_service import send_email
|
||||
from app.core.config import get_settings
|
||||
|
||||
# Check if email is in notify_channels
|
||||
channels = rule.notify_channels or []
|
||||
if "email" not in channels:
|
||||
return
|
||||
|
||||
# Get email targets from notify_targets
|
||||
targets = rule.notify_targets or {}
|
||||
emails = targets.get("emails", []) if isinstance(targets, dict) else []
|
||||
# If notify_targets is a list of strings (emails directly)
|
||||
if isinstance(targets, list):
|
||||
emails = [t for t in targets if isinstance(t, str) and "@" in t]
|
||||
|
||||
if not emails:
|
||||
logger.debug(f"No email recipients for alarm rule '{rule.name}', skipping.")
|
||||
return
|
||||
|
||||
# Fetch device info
|
||||
dev_result = await session.execute(select(Device).where(Device.id == device_id))
|
||||
device = dev_result.scalar_one_or_none()
|
||||
device_name = device.name if device else f"设备#{device_id}"
|
||||
device_code = device.code if device else "N/A"
|
||||
|
||||
settings = get_settings()
|
||||
severity_cfg = _SEVERITY_CONFIG.get(rule.severity, _SEVERITY_CONFIG["warning"])
|
||||
|
||||
# Build threshold string
|
||||
if rule.condition == "range_out":
|
||||
threshold_str = f"[{rule.threshold_low}, {rule.threshold_high}]"
|
||||
else:
|
||||
threshold_str = str(rule.threshold)
|
||||
|
||||
# Format triggered time in Beijing timezone
|
||||
triggered_time = event.triggered_at or datetime.now(timezone.utc)
|
||||
triggered_beijing = triggered_time + timedelta(hours=8)
|
||||
triggered_str = triggered_beijing.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Load and render template
|
||||
try:
|
||||
template_html = _ALARM_TEMPLATE_PATH.read_text(encoding="utf-8")
|
||||
except FileNotFoundError:
|
||||
logger.error("Alarm email template not found, skipping email.")
|
||||
return
|
||||
|
||||
body_html = template_html.format(
|
||||
severity_label=severity_cfg["label"],
|
||||
severity_badge_color=severity_cfg["badge_color"],
|
||||
severity_bg_color=severity_cfg["bg_color"],
|
||||
severity_text_color=severity_cfg["text_color"],
|
||||
title=event.title,
|
||||
device_name=device_name,
|
||||
device_code=device_code,
|
||||
data_type=rule.data_type,
|
||||
current_value=str(event.value),
|
||||
threshold_str=threshold_str,
|
||||
triggered_at=triggered_str,
|
||||
description=event.description or "",
|
||||
platform_url=settings.PLATFORM_URL,
|
||||
)
|
||||
|
||||
subject = f"[{severity_cfg['label']}] {event.title} - 天普EMS告警通知"
|
||||
asyncio.create_task(send_email(to=emails, subject=subject, body_html=body_html))
|
||||
|
||||
# Rate limit: don't create duplicate events for the same rule+device within this window
|
||||
RATE_LIMIT_MINUTES = 5
|
||||
|
||||
|
||||
def _in_silence_window(rule: AlarmRule, now_beijing: datetime) -> bool:
|
||||
"""Check if current time falls within the rule's silence window."""
|
||||
if not rule.silence_start or not rule.silence_end:
|
||||
return False
|
||||
current_time = now_beijing.strftime("%H:%M")
|
||||
start = rule.silence_start
|
||||
end = rule.silence_end
|
||||
if start <= end:
|
||||
return start <= current_time <= end
|
||||
else:
|
||||
# Crosses midnight, e.g. 22:00 - 06:00
|
||||
return current_time >= start or current_time <= end
|
||||
|
||||
|
||||
def _evaluate_condition(rule: AlarmRule, value: float) -> bool:
|
||||
"""Evaluate whether a data value triggers the alarm rule condition."""
|
||||
if rule.condition == "gt":
|
||||
return value > rule.threshold
|
||||
elif rule.condition == "lt":
|
||||
return value < rule.threshold
|
||||
elif rule.condition == "eq":
|
||||
return abs(value - rule.threshold) < 0.001
|
||||
elif rule.condition == "neq":
|
||||
return abs(value - rule.threshold) >= 0.001
|
||||
elif rule.condition == "range_out":
|
||||
low = rule.threshold_low if rule.threshold_low is not None else float("-inf")
|
||||
high = rule.threshold_high if rule.threshold_high is not None else float("inf")
|
||||
return value < low or value > high
|
||||
return False
|
||||
|
||||
|
||||
async def check_alarms(session: AsyncSession):
|
||||
"""Main alarm check routine. Call after each simulator data cycle."""
|
||||
now = datetime.now(timezone.utc)
|
||||
now_beijing = now + timedelta(hours=8)
|
||||
|
||||
# 1. Load all active alarm rules
|
||||
result = await session.execute(
|
||||
select(AlarmRule).where(AlarmRule.is_active == True)
|
||||
)
|
||||
rules = result.scalars().all()
|
||||
|
||||
for rule in rules:
|
||||
# Skip if in silence window
|
||||
if _in_silence_window(rule, now_beijing):
|
||||
continue
|
||||
|
||||
# 2. Find matching devices' latest data point
|
||||
# Rules can match by device_id (specific) or device_type (all devices of that type)
|
||||
data_query = (
|
||||
select(EnergyData)
|
||||
.where(EnergyData.data_type == rule.data_type)
|
||||
.order_by(EnergyData.timestamp.desc())
|
||||
)
|
||||
|
||||
if rule.device_id:
|
||||
data_query = data_query.where(EnergyData.device_id == rule.device_id)
|
||||
|
||||
# We need to check per-device, so get recent data points
|
||||
# For device_type rules, we get data from the last 30 seconds (one cycle)
|
||||
cutoff = now - timedelta(seconds=30)
|
||||
data_query = data_query.where(EnergyData.timestamp >= cutoff).limit(50)
|
||||
|
||||
data_result = await session.execute(data_query)
|
||||
data_points = data_result.scalars().all()
|
||||
|
||||
if not data_points:
|
||||
continue
|
||||
|
||||
# Group by device_id and take the latest per device
|
||||
latest_by_device: dict[int, EnergyData] = {}
|
||||
for dp in data_points:
|
||||
if dp.device_id not in latest_by_device:
|
||||
latest_by_device[dp.device_id] = dp
|
||||
|
||||
for device_id, dp in latest_by_device.items():
|
||||
triggered = _evaluate_condition(rule, dp.value)
|
||||
|
||||
# Check for existing active event for this rule + device
|
||||
active_event_result = await session.execute(
|
||||
select(AlarmEvent).where(
|
||||
and_(
|
||||
AlarmEvent.rule_id == rule.id,
|
||||
AlarmEvent.device_id == device_id,
|
||||
AlarmEvent.status.in_(["active", "acknowledged"]),
|
||||
)
|
||||
)
|
||||
)
|
||||
active_event = active_event_result.scalar_one_or_none()
|
||||
|
||||
if triggered and not active_event:
|
||||
# Rate limiting: check if a resolved event was created recently
|
||||
recent_result = await session.execute(
|
||||
select(AlarmEvent).where(
|
||||
and_(
|
||||
AlarmEvent.rule_id == rule.id,
|
||||
AlarmEvent.device_id == device_id,
|
||||
AlarmEvent.triggered_at >= now - timedelta(minutes=RATE_LIMIT_MINUTES),
|
||||
)
|
||||
)
|
||||
)
|
||||
if recent_result.scalar_one_or_none():
|
||||
continue # Skip, recently triggered
|
||||
|
||||
# Build description
|
||||
threshold_str = ""
|
||||
if rule.condition == "range_out":
|
||||
threshold_str = f"[{rule.threshold_low}, {rule.threshold_high}]"
|
||||
else:
|
||||
threshold_str = str(rule.threshold)
|
||||
|
||||
event = AlarmEvent(
|
||||
rule_id=rule.id,
|
||||
device_id=device_id,
|
||||
severity=rule.severity,
|
||||
title=rule.name,
|
||||
description=f"当前值 {dp.value},阈值 {threshold_str}",
|
||||
value=dp.value,
|
||||
threshold=rule.threshold,
|
||||
status="active",
|
||||
triggered_at=now,
|
||||
)
|
||||
session.add(event)
|
||||
logger.info(
|
||||
f"Alarm triggered: {rule.name} | device={device_id} | "
|
||||
f"value={dp.value} threshold={threshold_str}"
|
||||
)
|
||||
|
||||
# Send email notification (non-blocking)
|
||||
await _send_alarm_email(rule, event, device_id, session)
|
||||
|
||||
elif not triggered and active_event:
|
||||
# Auto-resolve
|
||||
active_event.status = "resolved"
|
||||
active_event.resolved_at = now
|
||||
active_event.resolve_note = "自动恢复"
|
||||
logger.info(
|
||||
f"Alarm auto-resolved: {rule.name} | device={device_id}"
|
||||
)
|
||||
|
||||
await session.flush()
|
||||
Reference in New Issue
Block a user