remind about persistent alerts

This commit is contained in:
Alex Tau 2025-06-06 00:46:44 +03:00
parent 2c234b2fd0
commit f691180e9b
9 changed files with 115 additions and 30 deletions

View file

@ -27,6 +27,7 @@ in
type = lib.types.listOf (lib.types.enum [ type = lib.types.listOf (lib.types.enum [
"start" "start"
"stop" "stop"
"remind"
"cpu" "cpu"
"ram" "ram"

View file

@ -3,6 +3,7 @@ import asyncio
import datetime import datetime
import logging import logging
import signal import signal
from typing import Coroutine
from . import checks from . import checks
from .alerting import channel from .alerting import channel
@ -10,7 +11,7 @@ from .checks.temp.sensors import print_readings
from .config import enums as config_enums from .config import enums as config_enums
from .config import load_config from .config import load_config
from .core import cvars from .core import cvars
from .core.checkers import IntervalChecker from .core.checkers import BaseChecker, IntervalChecker, ScheduledChecker
stopping = False stopping = False
@ -52,25 +53,35 @@ async def async_main():
check_sets = config_enums.CheckSet check_sets = config_enums.CheckSet
checker_sets = { checker_sets: dict[config_enums.CheckSet, list[Coroutine | BaseChecker]] = {
check_sets.START: [channel.send_start_alert()], check_sets.START: [channel.send_start_alert()],
check_sets.STOP: [], # this is checked later check_sets.STOP: [], # this is checked later
check_sets.CPU: [ check_sets.CPU: [IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True)],
IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True).run_checker() check_sets.RAM: [IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True)],
], check_sets.TEMP: [IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True)],
check_sets.RAM: [
IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker()
],
check_sets.TEMP: [
IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True).run_checker()
],
check_sets.VULNIX: [ check_sets.VULNIX: [
IntervalChecker( IntervalChecker(
checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_any_state=True checks.vulnix_check,
).run_checker() interval=datetime.timedelta(days=3),
persistent=True,
send_any_state=True,
# As those are checked less often than daily, reminds could lead to awkward situations
# when the vuln is fixed but you still get reminders about it for 2 more days.
remind=False,
)
],
check_sets.REMIND: [
ScheduledChecker(
checks.remind_check,
period=datetime.timedelta(days=1),
when=datetime.time(hour=0, minute=0),
persistent=False,
)
], ],
} }
checker_sets[check_sets.REMIND][0].check_args = [checker_sets]
checkers = [] checkers = []
for enabled_set in config.enabled_check_sets: for enabled_set in config.enabled_check_sets:
for checker in checker_sets[enabled_set]: for checker in checker_sets[enabled_set]:
@ -81,6 +92,8 @@ async def async_main():
async with asyncio.TaskGroup() as tg: async with asyncio.TaskGroup() as tg:
checker_tasks: set[asyncio.Task] = set() checker_tasks: set[asyncio.Task] = set()
for c in checkers: for c in checkers:
if isinstance(c, BaseChecker):
c = c.run_checker()
task = tg.create_task(c) task = tg.create_task(c)
checker_tasks.add(task) checker_tasks.add(task)
while True: while True:

View file

@ -5,7 +5,7 @@ from telethon.sessions import MemorySession
from ..core import cvars from ..core import cvars
from .alert import Alert from .alert import Alert
from .enum import AlertType, Severity from .enum import SEVERITY_TO_EMOJI, AlertType, Severity
async def get_client() -> TelegramClient: async def get_client() -> TelegramClient:
@ -16,22 +16,14 @@ async def get_client() -> TelegramClient:
return client return client
def format_message(alert: Alert, persistent: bool) -> str: def format_message(alert: Alert, note: str) -> str:
match alert.severity: severity_emoji = SEVERITY_TO_EMOJI[alert.severity]
case Severity.OK: note_formatted = f" - <i>{note}</i>" if note else ""
severity_emoji = "🟢" message = f"{severity_emoji} {alert.alert_type} Alert{note_formatted}\n{alert.message}"
case Severity.INFO:
severity_emoji = ""
case Severity.WARNING:
severity_emoji = "⚠️"
case Severity.CRITICAL:
severity_emoji = "🆘"
persistent_marker = " - <i>ongoing</i>" if persistent else ""
message = f"{severity_emoji} {alert.alert_type} Alert{persistent_marker}\n{alert.message}"
return message return message
async def send_alert(alert: Alert, persistent: bool = False) -> None: async def send_alert(alert: Alert, note: str = "") -> None:
try: try:
client = cvars.tg_client.get() client = cvars.tg_client.get()
except LookupError: # being called standalone except LookupError: # being called standalone
@ -43,7 +35,7 @@ async def send_alert(alert: Alert, persistent: bool = False) -> None:
else: else:
... # temp_client = False ... # temp_client = False
room_id = cvars.config.get().telegram.room_id room_id = cvars.config.get().telegram.room_id
message = format_message(alert, persistent) message = format_message(alert, note)
await client.send_message(entity=room_id, message=message) await client.send_message(entity=room_id, message=message)
# if temp_client: # if temp_client:
# await client.close() # await client.close()

View file

@ -9,6 +9,7 @@ class AlertType(StrEnum):
TEMP = "TEMP" TEMP = "TEMP"
TEST = "TEST" TEST = "TEST"
VULN = "VULN" VULN = "VULN"
REMIND = "REMIND"
# LOGIN = "LOGIN" # LOGIN = "LOGIN"
# SMART = "SMART" # TODO # SMART = "SMART" # TODO
# RAID = "RAID" # RAID = "RAID"
@ -22,3 +23,11 @@ class Severity(IntEnum):
INFO = 1 INFO = 1
WARNING = 2 WARNING = 2
CRITICAL = 3 CRITICAL = 3
SEVERITY_TO_EMOJI = {
Severity.OK: "🟢",
Severity.INFO: "",
Severity.WARNING: "⚠️",
Severity.CRITICAL: "🆘",
}

View file

@ -1,4 +1,5 @@
from .cpu import cpu_check from .cpu import cpu_check
from .ram import ram_check from .ram import ram_check
from .remind import remind_check
from .temp import temp_check from .temp import temp_check
from .vulnix import vulnix_check from .vulnix import vulnix_check

View file

@ -0,0 +1,43 @@
from typing import Any, Coroutine
from lego_monitoring.alerting.alert import Alert
from lego_monitoring.config.enums import CheckSet
from lego_monitoring.core.checkers import BaseChecker
def remind_check(checker_sets: dict[CheckSet, list[Coroutine | BaseChecker]]) -> list[Alert]:
alerts = []
for checker_set in checker_sets.values():
for c in checker_set:
if not isinstance(c, BaseChecker) or not c.persistent or not c.remind:
continue
alerts.extend(c.current_alerts)
return alerts
# alert_num_by_state_with_max_type: dict[AlertType, list[Severity | int]] = {}
# for checker_set in checker_sets.values():
# for c in checker_set:
# if not isinstance(c, BaseChecker) or not c.persistent:
# continue
# for a in c.current_alerts:
# if a.alert_type not in alert_num_by_state_with_max_type:
# alert_num_by_state_with_max_type[a.alert_type] = [a.severity, 1]
# else:
# existing_list = alert_num_by_state_with_max_type[a.alert_type]
# if a.severity > existing_list[0]:
# existing_list[0] = a.severity
# existing_list[1] += 1
# if len(alert_num_by_state_with_max_type) == 0:
# return []
# message = "There are ongoing events:"
# for at, sev_count in alert_num_by_state_with_max_type.items():
# message += f"\n* {SEVERITY_TO_EMOJI[sev_count[0]]} {str(at)} - {sev_count[1]} alerts"
# message += (
# "\n\nUse /ongoing to see them or /status to see this short reminder again (NOT IMPLEMENTED YET)."
# + "\nYou will also be reminded daily until the situation is resolved."
# )
# alert = Alert(alert_type=AlertType.REMIND, message=message, severity=max(alert_num_by_state_with_max_type.keys()))
# return [alert]

View file

@ -4,6 +4,7 @@ from enum import StrEnum
class CheckSet(StrEnum): class CheckSet(StrEnum):
START = "start" START = "start"
STOP = "stop" STOP = "stop"
REMIND = "remind"
CPU = "cpu" CPU = "cpu"
RAM = "ram" RAM = "ram"

View file

@ -8,14 +8,36 @@ from ..alerting.alert import Alert
from ..alerting.channel import send_alert from ..alerting.channel import send_alert
from ..alerting.current import CurrentAlerts from ..alerting.current import CurrentAlerts
from ..alerting.enum import Severity from ..alerting.enum import Severity
from . import cvars
@dataclass @dataclass
class BaseChecker: class BaseChecker:
check: Callable | Coroutine check: Callable | Coroutine
persistent: bool persistent: bool
"""
Whether this checker remembers its last alerts.
Logically, persistent alerts show the system's ongoing state, rather that one-time events
"""
send_any_state: bool = False send_any_state: bool = False
"""
False: this persistent checker only emits messages when its max alert severity is changed
True: this persistent checker emits messages every times it checks
Has no effect if persistent == False
"""
remind: bool = True
"""
False: this persistent checker's last alerts are reminded daily
True: this persistent checker's last alerts are not reminded daily
Has no effect if persistent == False
"""
check_args: list = field(default_factory=list) check_args: list = field(default_factory=list)
check_kwargs: dict[str, Any] = field(default_factory=dict) check_kwargs: dict[str, Any] = field(default_factory=dict)
current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False) current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False)
@ -41,7 +63,7 @@ class BaseChecker:
new_types = self.current_alerts.get_types() new_types = self.current_alerts.get_types()
if old_severity != new_severity or self.send_any_state: if old_severity != new_severity or self.send_any_state:
for alert in alerts: for alert in alerts:
await send_alert(alert, persistent=True) await send_alert(alert, note="ongoing")
for alert_type in old_types - new_types: for alert_type in old_types - new_types:
alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK) alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK)
await send_alert(alert) await send_alert(alert)

View file

@ -2,7 +2,10 @@ from contextvars import ContextVar
from telethon import TelegramClient from telethon import TelegramClient
from lego_monitoring.alerting.current import CurrentAlerts
from ..config import Config from ..config import Config
config: ContextVar[Config] = ContextVar("config") config: ContextVar[Config] = ContextVar("config")
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client") tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])