From 2c234b2fd06fac0d80941219edf064ae520de785 Mon Sep 17 00:00:00 2001 From: Alex Tau Date: Thu, 5 Jun 2025 22:52:57 +0300 Subject: [PATCH] persistent alerts --- src/lego_monitoring/__init__.py | 4 +-- src/lego_monitoring/alerting/channel.py | 9 ++++--- src/lego_monitoring/alerting/current.py | 2 +- src/lego_monitoring/core/checkers.py | 35 ++++++++++++++++--------- src/lego_monitoring/core/cvars.py | 2 -- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/lego_monitoring/__init__.py b/src/lego_monitoring/__init__.py index 1971316..eabd599 100644 --- a/src/lego_monitoring/__init__.py +++ b/src/lego_monitoring/__init__.py @@ -56,7 +56,7 @@ async def async_main(): check_sets.START: [channel.send_start_alert()], check_sets.STOP: [], # this is checked later check_sets.CPU: [ - IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=5), persistent=True).run_checker() + IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True).run_checker() ], check_sets.RAM: [ IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker() @@ -66,7 +66,7 @@ async def async_main(): ], check_sets.VULNIX: [ IntervalChecker( - checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_same_state=True + checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_any_state=True ).run_checker() ], } diff --git a/src/lego_monitoring/alerting/channel.py b/src/lego_monitoring/alerting/channel.py index 09a72d7..3124b6b 100644 --- a/src/lego_monitoring/alerting/channel.py +++ b/src/lego_monitoring/alerting/channel.py @@ -16,7 +16,7 @@ async def get_client() -> TelegramClient: return client -def format_message(alert: Alert) -> str: +def format_message(alert: Alert, persistent: bool) -> str: match alert.severity: case Severity.OK: severity_emoji = "🟢" @@ -26,11 +26,12 @@ def format_message(alert: Alert) -> str: severity_emoji = "⚠️" case Severity.CRITICAL: severity_emoji = "🆘" - message = f"{severity_emoji} {alert.alert_type} Alert\n{alert.message}" + persistent_marker = " - ongoing" if persistent else "" + message = f"{severity_emoji} {alert.alert_type} Alert{persistent_marker}\n{alert.message}" return message -async def send_alert(alert: Alert) -> None: +async def send_alert(alert: Alert, persistent: bool = False) -> None: try: client = cvars.tg_client.get() except LookupError: # being called standalone @@ -42,7 +43,7 @@ async def send_alert(alert: Alert) -> None: else: ... # temp_client = False room_id = cvars.config.get().telegram.room_id - message = format_message(alert) + message = format_message(alert, persistent) await client.send_message(entity=room_id, message=message) # if temp_client: # await client.close() diff --git a/src/lego_monitoring/alerting/current.py b/src/lego_monitoring/alerting/current.py index dae575e..bdd65e6 100644 --- a/src/lego_monitoring/alerting/current.py +++ b/src/lego_monitoring/alerting/current.py @@ -10,7 +10,7 @@ class CurrentAlerts(list[Alert]): for a in self: if max_severity is None or a.severity > max_severity: max_severity = a.severity - return a.severity + return max_severity def get_types(self) -> set[AlertType]: types = set() diff --git a/src/lego_monitoring/core/checkers.py b/src/lego_monitoring/core/checkers.py index 5c2c97d..9f976a3 100644 --- a/src/lego_monitoring/core/checkers.py +++ b/src/lego_monitoring/core/checkers.py @@ -6,15 +6,19 @@ from typing import Any, Callable, Coroutine from ..alerting.alert import Alert from ..alerting.channel import send_alert +from ..alerting.current import CurrentAlerts +from ..alerting.enum import Severity +from . import cvars @dataclass class BaseChecker: check: Callable | Coroutine persistent: bool - send_same_state: bool = False + send_any_state: bool = False check_args: list = field(default_factory=list) check_kwargs: dict[str, Any] = field(default_factory=dict) + current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False) async def _call_check(self) -> list[Alert]: if isinstance(self.check, Callable): @@ -27,13 +31,22 @@ class BaseChecker: raise TypeError(f"check is {type(self.check)}, neither function nor coroutine") return result - async def _handle_alert(alert: Alert, persistent: bool, send_same_state: bool) -> None: - if not persistent: - await send_alert(alert) + async def _handle_alerts(self, alerts: list[Alert]) -> None: + if not self.persistent: + for alert in alerts: + await send_alert(alert) return - ... + old_types = self.current_alerts.get_types() + old_severity, new_severity = self.current_alerts.update(alerts) + new_types = self.current_alerts.get_types() + if old_severity != new_severity or self.send_any_state: + for alert in alerts: + await send_alert(alert, persistent=True) + for alert_type in old_types - new_types: + alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK) + await send_alert(alert) - async def run_checker(self): + async def run_checker(self) -> None: raise NotImplementedError @@ -42,14 +55,13 @@ class IntervalChecker(BaseChecker): _: KW_ONLY interval: datetime.timedelta - async def run_checker(self): + async def run_checker(self) -> None: interval_secs = self.interval.total_seconds() while True: logging.info(f"Calling {self.check.__name__}") result = await self._call_check() logging.info(f"Got {len(result)} alerts") - for alert in result: - await send_alert(alert) + await self._handle_alerts(result) await asyncio.sleep(interval_secs) @@ -59,7 +71,7 @@ class ScheduledChecker(BaseChecker): period: datetime.timedelta when: datetime.time - async def run_checker(self): + async def run_checker(self) -> None: match self.period: case datetime.timedelta(days=1): while True: @@ -75,7 +87,6 @@ class ScheduledChecker(BaseChecker): logging.info(f"Calling {self.check.__name__}") result = await self._call_check() logging.info(f"Got {len(result)} alerts") - for alert in result: - await send_alert(alert) + await self._handle_alerts(result) case _: raise NotImplementedError diff --git a/src/lego_monitoring/core/cvars.py b/src/lego_monitoring/core/cvars.py index 78dff36..a4781c5 100644 --- a/src/lego_monitoring/core/cvars.py +++ b/src/lego_monitoring/core/cvars.py @@ -2,9 +2,7 @@ from contextvars import ContextVar from telethon import TelegramClient -from ..alerting.current import CurrentAlerts from ..config import Config config: ContextVar[Config] = ContextVar("config") tg_client: ContextVar[TelegramClient] = ContextVar("tg_client") -current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])