mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
remind about persistent alerts
This commit is contained in:
parent
2c234b2fd0
commit
f691180e9b
9 changed files with 115 additions and 30 deletions
|
|
@ -3,6 +3,7 @@ import asyncio
|
|||
import datetime
|
||||
import logging
|
||||
import signal
|
||||
from typing import Coroutine
|
||||
|
||||
from . import checks
|
||||
from .alerting import channel
|
||||
|
|
@ -10,7 +11,7 @@ from .checks.temp.sensors import print_readings
|
|||
from .config import enums as config_enums
|
||||
from .config import load_config
|
||||
from .core import cvars
|
||||
from .core.checkers import IntervalChecker
|
||||
from .core.checkers import BaseChecker, IntervalChecker, ScheduledChecker
|
||||
|
||||
stopping = False
|
||||
|
||||
|
|
@ -52,25 +53,35 @@ async def async_main():
|
|||
|
||||
check_sets = config_enums.CheckSet
|
||||
|
||||
checker_sets = {
|
||||
checker_sets: dict[config_enums.CheckSet, list[Coroutine | BaseChecker]] = {
|
||||
check_sets.START: [channel.send_start_alert()],
|
||||
check_sets.STOP: [], # this is checked later
|
||||
check_sets.CPU: [
|
||||
IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True).run_checker()
|
||||
],
|
||||
check_sets.RAM: [
|
||||
IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker()
|
||||
],
|
||||
check_sets.TEMP: [
|
||||
IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True).run_checker()
|
||||
],
|
||||
check_sets.CPU: [IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True)],
|
||||
check_sets.RAM: [IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True)],
|
||||
check_sets.TEMP: [IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True)],
|
||||
check_sets.VULNIX: [
|
||||
IntervalChecker(
|
||||
checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_any_state=True
|
||||
).run_checker()
|
||||
checks.vulnix_check,
|
||||
interval=datetime.timedelta(days=3),
|
||||
persistent=True,
|
||||
send_any_state=True,
|
||||
# As those are checked less often than daily, reminds could lead to awkward situations
|
||||
# when the vuln is fixed but you still get reminders about it for 2 more days.
|
||||
remind=False,
|
||||
)
|
||||
],
|
||||
check_sets.REMIND: [
|
||||
ScheduledChecker(
|
||||
checks.remind_check,
|
||||
period=datetime.timedelta(days=1),
|
||||
when=datetime.time(hour=0, minute=0),
|
||||
persistent=False,
|
||||
)
|
||||
],
|
||||
}
|
||||
|
||||
checker_sets[check_sets.REMIND][0].check_args = [checker_sets]
|
||||
|
||||
checkers = []
|
||||
for enabled_set in config.enabled_check_sets:
|
||||
for checker in checker_sets[enabled_set]:
|
||||
|
|
@ -81,6 +92,8 @@ async def async_main():
|
|||
async with asyncio.TaskGroup() as tg:
|
||||
checker_tasks: set[asyncio.Task] = set()
|
||||
for c in checkers:
|
||||
if isinstance(c, BaseChecker):
|
||||
c = c.run_checker()
|
||||
task = tg.create_task(c)
|
||||
checker_tasks.add(task)
|
||||
while True:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from telethon.sessions import MemorySession
|
|||
|
||||
from ..core import cvars
|
||||
from .alert import Alert
|
||||
from .enum import AlertType, Severity
|
||||
from .enum import SEVERITY_TO_EMOJI, AlertType, Severity
|
||||
|
||||
|
||||
async def get_client() -> TelegramClient:
|
||||
|
|
@ -16,22 +16,14 @@ async def get_client() -> TelegramClient:
|
|||
return client
|
||||
|
||||
|
||||
def format_message(alert: Alert, persistent: bool) -> str:
|
||||
match alert.severity:
|
||||
case Severity.OK:
|
||||
severity_emoji = "🟢"
|
||||
case Severity.INFO:
|
||||
severity_emoji = "ℹ️"
|
||||
case Severity.WARNING:
|
||||
severity_emoji = "⚠️"
|
||||
case Severity.CRITICAL:
|
||||
severity_emoji = "🆘"
|
||||
persistent_marker = " - <i>ongoing</i>" if persistent else ""
|
||||
message = f"{severity_emoji} {alert.alert_type} Alert{persistent_marker}\n{alert.message}"
|
||||
def format_message(alert: Alert, note: str) -> str:
|
||||
severity_emoji = SEVERITY_TO_EMOJI[alert.severity]
|
||||
note_formatted = f" - <i>{note}</i>" if note else ""
|
||||
message = f"{severity_emoji} {alert.alert_type} Alert{note_formatted}\n{alert.message}"
|
||||
return message
|
||||
|
||||
|
||||
async def send_alert(alert: Alert, persistent: bool = False) -> None:
|
||||
async def send_alert(alert: Alert, note: str = "") -> None:
|
||||
try:
|
||||
client = cvars.tg_client.get()
|
||||
except LookupError: # being called standalone
|
||||
|
|
@ -43,7 +35,7 @@ async def send_alert(alert: Alert, persistent: bool = False) -> None:
|
|||
else:
|
||||
... # temp_client = False
|
||||
room_id = cvars.config.get().telegram.room_id
|
||||
message = format_message(alert, persistent)
|
||||
message = format_message(alert, note)
|
||||
await client.send_message(entity=room_id, message=message)
|
||||
# if temp_client:
|
||||
# await client.close()
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ class AlertType(StrEnum):
|
|||
TEMP = "TEMP"
|
||||
TEST = "TEST"
|
||||
VULN = "VULN"
|
||||
REMIND = "REMIND"
|
||||
# LOGIN = "LOGIN"
|
||||
# SMART = "SMART" # TODO
|
||||
# RAID = "RAID"
|
||||
|
|
@ -22,3 +23,11 @@ class Severity(IntEnum):
|
|||
INFO = 1
|
||||
WARNING = 2
|
||||
CRITICAL = 3
|
||||
|
||||
|
||||
SEVERITY_TO_EMOJI = {
|
||||
Severity.OK: "🟢",
|
||||
Severity.INFO: "ℹ️",
|
||||
Severity.WARNING: "⚠️",
|
||||
Severity.CRITICAL: "🆘",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from .cpu import cpu_check
|
||||
from .ram import ram_check
|
||||
from .remind import remind_check
|
||||
from .temp import temp_check
|
||||
from .vulnix import vulnix_check
|
||||
|
|
|
|||
43
src/lego_monitoring/checks/remind.py
Normal file
43
src/lego_monitoring/checks/remind.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from typing import Any, Coroutine
|
||||
|
||||
from lego_monitoring.alerting.alert import Alert
|
||||
from lego_monitoring.config.enums import CheckSet
|
||||
from lego_monitoring.core.checkers import BaseChecker
|
||||
|
||||
|
||||
def remind_check(checker_sets: dict[CheckSet, list[Coroutine | BaseChecker]]) -> list[Alert]:
|
||||
alerts = []
|
||||
for checker_set in checker_sets.values():
|
||||
for c in checker_set:
|
||||
if not isinstance(c, BaseChecker) or not c.persistent or not c.remind:
|
||||
continue
|
||||
alerts.extend(c.current_alerts)
|
||||
return alerts
|
||||
|
||||
# alert_num_by_state_with_max_type: dict[AlertType, list[Severity | int]] = {}
|
||||
# for checker_set in checker_sets.values():
|
||||
# for c in checker_set:
|
||||
# if not isinstance(c, BaseChecker) or not c.persistent:
|
||||
# continue
|
||||
# for a in c.current_alerts:
|
||||
# if a.alert_type not in alert_num_by_state_with_max_type:
|
||||
# alert_num_by_state_with_max_type[a.alert_type] = [a.severity, 1]
|
||||
# else:
|
||||
# existing_list = alert_num_by_state_with_max_type[a.alert_type]
|
||||
# if a.severity > existing_list[0]:
|
||||
# existing_list[0] = a.severity
|
||||
# existing_list[1] += 1
|
||||
|
||||
# if len(alert_num_by_state_with_max_type) == 0:
|
||||
# return []
|
||||
|
||||
# message = "There are ongoing events:"
|
||||
# for at, sev_count in alert_num_by_state_with_max_type.items():
|
||||
# message += f"\n* {SEVERITY_TO_EMOJI[sev_count[0]]} {str(at)} - {sev_count[1]} alerts"
|
||||
# message += (
|
||||
# "\n\nUse /ongoing to see them or /status to see this short reminder again (NOT IMPLEMENTED YET)."
|
||||
# + "\nYou will also be reminded daily until the situation is resolved."
|
||||
# )
|
||||
|
||||
# alert = Alert(alert_type=AlertType.REMIND, message=message, severity=max(alert_num_by_state_with_max_type.keys()))
|
||||
# return [alert]
|
||||
|
|
@ -4,6 +4,7 @@ from enum import StrEnum
|
|||
class CheckSet(StrEnum):
|
||||
START = "start"
|
||||
STOP = "stop"
|
||||
REMIND = "remind"
|
||||
|
||||
CPU = "cpu"
|
||||
RAM = "ram"
|
||||
|
|
|
|||
|
|
@ -8,14 +8,36 @@ from ..alerting.alert import Alert
|
|||
from ..alerting.channel import send_alert
|
||||
from ..alerting.current import CurrentAlerts
|
||||
from ..alerting.enum import Severity
|
||||
from . import cvars
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseChecker:
|
||||
check: Callable | Coroutine
|
||||
|
||||
persistent: bool
|
||||
"""
|
||||
Whether this checker remembers its last alerts.
|
||||
Logically, persistent alerts show the system's ongoing state, rather that one-time events
|
||||
"""
|
||||
|
||||
send_any_state: bool = False
|
||||
"""
|
||||
False: this persistent checker only emits messages when its max alert severity is changed
|
||||
|
||||
True: this persistent checker emits messages every times it checks
|
||||
|
||||
Has no effect if persistent == False
|
||||
"""
|
||||
|
||||
remind: bool = True
|
||||
"""
|
||||
False: this persistent checker's last alerts are reminded daily
|
||||
|
||||
True: this persistent checker's last alerts are not reminded daily
|
||||
|
||||
Has no effect if persistent == False
|
||||
"""
|
||||
|
||||
check_args: list = field(default_factory=list)
|
||||
check_kwargs: dict[str, Any] = field(default_factory=dict)
|
||||
current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False)
|
||||
|
|
@ -41,7 +63,7 @@ class BaseChecker:
|
|||
new_types = self.current_alerts.get_types()
|
||||
if old_severity != new_severity or self.send_any_state:
|
||||
for alert in alerts:
|
||||
await send_alert(alert, persistent=True)
|
||||
await send_alert(alert, note="ongoing")
|
||||
for alert_type in old_types - new_types:
|
||||
alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK)
|
||||
await send_alert(alert)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,10 @@ from contextvars import ContextVar
|
|||
|
||||
from telethon import TelegramClient
|
||||
|
||||
from lego_monitoring.alerting.current import CurrentAlerts
|
||||
|
||||
from ..config import Config
|
||||
|
||||
config: ContextVar[Config] = ContextVar("config")
|
||||
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
||||
current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue