"""告警检测服务 - 根据告警规则检查最新数据,生成/自动恢复告警事件""" import asyncio import logging from datetime import datetime, timezone, timedelta from pathlib import Path from sqlalchemy import select, and_ from sqlalchemy.ext.asyncio import AsyncSession from app.models.alarm import AlarmRule, AlarmEvent from app.models.energy import EnergyData from app.models.device import Device from app.hooks import get_hooks logger = logging.getLogger("alarm_checker") # Alarm email template path _ALARM_TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "templates" / "alarm_email.html" # Severity display config _SEVERITY_CONFIG = { "critical": { "label": "紧急告警", "badge_color": "#d32f2f", "bg_color": "#ffebee", "text_color": "#c62828", }, "major": { "label": "重要告警", "badge_color": "#e65100", "bg_color": "#fff3e0", "text_color": "#e65100", }, "warning": { "label": "一般告警", "badge_color": "#f9a825", "bg_color": "#fffde7", "text_color": "#f57f17", }, } async def _send_alarm_email( rule: AlarmRule, event: AlarmEvent, device_id: int, session: AsyncSession ): """Send alarm notification email if configured.""" from app.services.email_service import send_email from app.core.config import get_settings # Check if email is in notify_channels channels = rule.notify_channels or [] if "email" not in channels: return # Get email targets from notify_targets targets = rule.notify_targets or {} emails = targets.get("emails", []) if isinstance(targets, dict) else [] # If notify_targets is a list of strings (emails directly) if isinstance(targets, list): emails = [t for t in targets if isinstance(t, str) and "@" in t] if not emails: logger.debug(f"No email recipients for alarm rule '{rule.name}', skipping.") return # Fetch device info dev_result = await session.execute(select(Device).where(Device.id == device_id)) device = dev_result.scalar_one_or_none() device_name = device.name if device else f"设备#{device_id}" device_code = device.code if device else "N/A" settings = get_settings() severity_cfg = _SEVERITY_CONFIG.get(rule.severity, _SEVERITY_CONFIG["warning"]) # Build threshold string if rule.condition == "range_out": threshold_str = f"[{rule.threshold_low}, {rule.threshold_high}]" else: threshold_str = str(rule.threshold) # Format triggered time in Beijing timezone triggered_time = event.triggered_at or datetime.now(timezone.utc) triggered_beijing = triggered_time + timedelta(hours=8) triggered_str = triggered_beijing.strftime("%Y-%m-%d %H:%M:%S") # Load and render template try: template_html = _ALARM_TEMPLATE_PATH.read_text(encoding="utf-8") except FileNotFoundError: logger.error("Alarm email template not found, skipping email.") return body_html = template_html.format( severity_label=severity_cfg["label"], severity_badge_color=severity_cfg["badge_color"], severity_bg_color=severity_cfg["bg_color"], severity_text_color=severity_cfg["text_color"], title=event.title, device_name=device_name, device_code=device_code, data_type=rule.data_type, current_value=str(event.value), threshold_str=threshold_str, triggered_at=triggered_str, description=event.description or "", platform_url=settings.PLATFORM_URL, ) subject = f"[{severity_cfg['label']}] {event.title} - 天普EMS告警通知" asyncio.create_task(send_email(to=emails, subject=subject, body_html=body_html)) # Rate limit: don't create duplicate events for the same rule+device within this window RATE_LIMIT_MINUTES = 5 def _in_silence_window(rule: AlarmRule, now_beijing: datetime) -> bool: """Check if current time falls within the rule's silence window.""" if not rule.silence_start or not rule.silence_end: return False current_time = now_beijing.strftime("%H:%M") start = rule.silence_start end = rule.silence_end if start <= end: return start <= current_time <= end else: # Crosses midnight, e.g. 22:00 - 06:00 return current_time >= start or current_time <= end def _evaluate_condition(rule: AlarmRule, value: float) -> bool: """Evaluate whether a data value triggers the alarm rule condition.""" if rule.condition == "gt": return value > rule.threshold elif rule.condition == "lt": return value < rule.threshold elif rule.condition == "eq": return abs(value - rule.threshold) < 0.001 elif rule.condition == "neq": return abs(value - rule.threshold) >= 0.001 elif rule.condition == "range_out": low = rule.threshold_low if rule.threshold_low is not None else float("-inf") high = rule.threshold_high if rule.threshold_high is not None else float("inf") return value < low or value > high return False async def check_alarms(session: AsyncSession): """Main alarm check routine. Call after each simulator data cycle.""" now = datetime.now(timezone.utc) now_beijing = now + timedelta(hours=8) # 1. Load all active alarm rules result = await session.execute( select(AlarmRule).where(AlarmRule.is_active == True) ) rules = result.scalars().all() for rule in rules: # Skip if in silence window if _in_silence_window(rule, now_beijing): continue # 2. Find matching devices' latest data point # Rules can match by device_id (specific) or device_type (all devices of that type) data_query = ( select(EnergyData) .where(EnergyData.data_type == rule.data_type) .order_by(EnergyData.timestamp.desc()) ) if rule.device_id: data_query = data_query.where(EnergyData.device_id == rule.device_id) # We need to check per-device, so get recent data points # For device_type rules, we get data from the last 30 seconds (one cycle) cutoff = now - timedelta(seconds=30) data_query = data_query.where(EnergyData.timestamp >= cutoff).limit(50) data_result = await session.execute(data_query) data_points = data_result.scalars().all() if not data_points: continue # Group by device_id and take the latest per device latest_by_device: dict[int, EnergyData] = {} for dp in data_points: if dp.device_id not in latest_by_device: latest_by_device[dp.device_id] = dp for device_id, dp in latest_by_device.items(): triggered = _evaluate_condition(rule, dp.value) # Check for existing active event for this rule + device active_event_result = await session.execute( select(AlarmEvent).where( and_( AlarmEvent.rule_id == rule.id, AlarmEvent.device_id == device_id, AlarmEvent.status.in_(["active", "acknowledged"]), ) ) ) active_event = active_event_result.scalar_one_or_none() if triggered and not active_event: # Rate limiting: check if a resolved event was created recently recent_result = await session.execute( select(AlarmEvent).where( and_( AlarmEvent.rule_id == rule.id, AlarmEvent.device_id == device_id, AlarmEvent.triggered_at >= now - timedelta(minutes=RATE_LIMIT_MINUTES), ) ) ) if recent_result.scalar_one_or_none(): continue # Skip, recently triggered # Build description threshold_str = "" if rule.condition == "range_out": threshold_str = f"[{rule.threshold_low}, {rule.threshold_high}]" else: threshold_str = str(rule.threshold) event = AlarmEvent( rule_id=rule.id, device_id=device_id, severity=rule.severity, title=rule.name, description=f"当前值 {dp.value},阈值 {threshold_str}", value=dp.value, threshold=rule.threshold, status="active", triggered_at=now, ) session.add(event) await session.flush() # Ensure event has id # Customer hook: on_alarm_created try: _dev = await session.execute(select(Device).where(Device.id == device_id)) _device = _dev.scalar_one_or_none() await get_hooks().on_alarm_created(event, _device, rule, session) except Exception as _he: logger.error(f"Hook on_alarm_created error: {_he}") logger.info( f"Alarm triggered: {rule.name} | device={device_id} | " f"value={dp.value} threshold={threshold_str}" ) # Send email notification (non-blocking) await _send_alarm_email(rule, event, device_id, session) elif not triggered and active_event: # Auto-resolve active_event.status = "resolved" active_event.resolved_at = now active_event.resolve_note = "自动恢复" # Customer hook: on_alarm_resolved try: _dev2 = await session.execute(select(Device).where(Device.id == device_id)) _device2 = _dev2.scalar_one_or_none() await get_hooks().on_alarm_resolved(active_event, _device2, session) except Exception as _he2: logger.error(f"Hook on_alarm_resolved error: {_he2}") logger.info( f"Alarm auto-resolved: {rule.name} | device={device_id}" ) await session.flush()