mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
remind about persistent alerts
This commit is contained in:
parent
2c234b2fd0
commit
f691180e9b
9 changed files with 115 additions and 30 deletions
|
|
@ -27,6 +27,7 @@ in
|
||||||
type = lib.types.listOf (lib.types.enum [
|
type = lib.types.listOf (lib.types.enum [
|
||||||
"start"
|
"start"
|
||||||
"stop"
|
"stop"
|
||||||
|
"remind"
|
||||||
|
|
||||||
"cpu"
|
"cpu"
|
||||||
"ram"
|
"ram"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import signal
|
import signal
|
||||||
|
from typing import Coroutine
|
||||||
|
|
||||||
from . import checks
|
from . import checks
|
||||||
from .alerting import channel
|
from .alerting import channel
|
||||||
|
|
@ -10,7 +11,7 @@ from .checks.temp.sensors import print_readings
|
||||||
from .config import enums as config_enums
|
from .config import enums as config_enums
|
||||||
from .config import load_config
|
from .config import load_config
|
||||||
from .core import cvars
|
from .core import cvars
|
||||||
from .core.checkers import IntervalChecker
|
from .core.checkers import BaseChecker, IntervalChecker, ScheduledChecker
|
||||||
|
|
||||||
stopping = False
|
stopping = False
|
||||||
|
|
||||||
|
|
@ -52,25 +53,35 @@ async def async_main():
|
||||||
|
|
||||||
check_sets = config_enums.CheckSet
|
check_sets = config_enums.CheckSet
|
||||||
|
|
||||||
checker_sets = {
|
checker_sets: dict[config_enums.CheckSet, list[Coroutine | BaseChecker]] = {
|
||||||
check_sets.START: [channel.send_start_alert()],
|
check_sets.START: [channel.send_start_alert()],
|
||||||
check_sets.STOP: [], # this is checked later
|
check_sets.STOP: [], # this is checked later
|
||||||
check_sets.CPU: [
|
check_sets.CPU: [IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True)],
|
||||||
IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True).run_checker()
|
check_sets.RAM: [IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True)],
|
||||||
],
|
check_sets.TEMP: [IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True)],
|
||||||
check_sets.RAM: [
|
|
||||||
IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker()
|
|
||||||
],
|
|
||||||
check_sets.TEMP: [
|
|
||||||
IntervalChecker(checks.temp_check, interval=datetime.timedelta(minutes=5), persistent=True).run_checker()
|
|
||||||
],
|
|
||||||
check_sets.VULNIX: [
|
check_sets.VULNIX: [
|
||||||
IntervalChecker(
|
IntervalChecker(
|
||||||
checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_any_state=True
|
checks.vulnix_check,
|
||||||
).run_checker()
|
interval=datetime.timedelta(days=3),
|
||||||
|
persistent=True,
|
||||||
|
send_any_state=True,
|
||||||
|
# As those are checked less often than daily, reminds could lead to awkward situations
|
||||||
|
# when the vuln is fixed but you still get reminders about it for 2 more days.
|
||||||
|
remind=False,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
check_sets.REMIND: [
|
||||||
|
ScheduledChecker(
|
||||||
|
checks.remind_check,
|
||||||
|
period=datetime.timedelta(days=1),
|
||||||
|
when=datetime.time(hour=0, minute=0),
|
||||||
|
persistent=False,
|
||||||
|
)
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checker_sets[check_sets.REMIND][0].check_args = [checker_sets]
|
||||||
|
|
||||||
checkers = []
|
checkers = []
|
||||||
for enabled_set in config.enabled_check_sets:
|
for enabled_set in config.enabled_check_sets:
|
||||||
for checker in checker_sets[enabled_set]:
|
for checker in checker_sets[enabled_set]:
|
||||||
|
|
@ -81,6 +92,8 @@ async def async_main():
|
||||||
async with asyncio.TaskGroup() as tg:
|
async with asyncio.TaskGroup() as tg:
|
||||||
checker_tasks: set[asyncio.Task] = set()
|
checker_tasks: set[asyncio.Task] = set()
|
||||||
for c in checkers:
|
for c in checkers:
|
||||||
|
if isinstance(c, BaseChecker):
|
||||||
|
c = c.run_checker()
|
||||||
task = tg.create_task(c)
|
task = tg.create_task(c)
|
||||||
checker_tasks.add(task)
|
checker_tasks.add(task)
|
||||||
while True:
|
while True:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from telethon.sessions import MemorySession
|
||||||
|
|
||||||
from ..core import cvars
|
from ..core import cvars
|
||||||
from .alert import Alert
|
from .alert import Alert
|
||||||
from .enum import AlertType, Severity
|
from .enum import SEVERITY_TO_EMOJI, AlertType, Severity
|
||||||
|
|
||||||
|
|
||||||
async def get_client() -> TelegramClient:
|
async def get_client() -> TelegramClient:
|
||||||
|
|
@ -16,22 +16,14 @@ async def get_client() -> TelegramClient:
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
||||||
def format_message(alert: Alert, persistent: bool) -> str:
|
def format_message(alert: Alert, note: str) -> str:
|
||||||
match alert.severity:
|
severity_emoji = SEVERITY_TO_EMOJI[alert.severity]
|
||||||
case Severity.OK:
|
note_formatted = f" - <i>{note}</i>" if note else ""
|
||||||
severity_emoji = "🟢"
|
message = f"{severity_emoji} {alert.alert_type} Alert{note_formatted}\n{alert.message}"
|
||||||
case Severity.INFO:
|
|
||||||
severity_emoji = "ℹ️"
|
|
||||||
case Severity.WARNING:
|
|
||||||
severity_emoji = "⚠️"
|
|
||||||
case Severity.CRITICAL:
|
|
||||||
severity_emoji = "🆘"
|
|
||||||
persistent_marker = " - <i>ongoing</i>" if persistent else ""
|
|
||||||
message = f"{severity_emoji} {alert.alert_type} Alert{persistent_marker}\n{alert.message}"
|
|
||||||
return message
|
return message
|
||||||
|
|
||||||
|
|
||||||
async def send_alert(alert: Alert, persistent: bool = False) -> None:
|
async def send_alert(alert: Alert, note: str = "") -> None:
|
||||||
try:
|
try:
|
||||||
client = cvars.tg_client.get()
|
client = cvars.tg_client.get()
|
||||||
except LookupError: # being called standalone
|
except LookupError: # being called standalone
|
||||||
|
|
@ -43,7 +35,7 @@ async def send_alert(alert: Alert, persistent: bool = False) -> None:
|
||||||
else:
|
else:
|
||||||
... # temp_client = False
|
... # temp_client = False
|
||||||
room_id = cvars.config.get().telegram.room_id
|
room_id = cvars.config.get().telegram.room_id
|
||||||
message = format_message(alert, persistent)
|
message = format_message(alert, note)
|
||||||
await client.send_message(entity=room_id, message=message)
|
await client.send_message(entity=room_id, message=message)
|
||||||
# if temp_client:
|
# if temp_client:
|
||||||
# await client.close()
|
# await client.close()
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ class AlertType(StrEnum):
|
||||||
TEMP = "TEMP"
|
TEMP = "TEMP"
|
||||||
TEST = "TEST"
|
TEST = "TEST"
|
||||||
VULN = "VULN"
|
VULN = "VULN"
|
||||||
|
REMIND = "REMIND"
|
||||||
# LOGIN = "LOGIN"
|
# LOGIN = "LOGIN"
|
||||||
# SMART = "SMART" # TODO
|
# SMART = "SMART" # TODO
|
||||||
# RAID = "RAID"
|
# RAID = "RAID"
|
||||||
|
|
@ -22,3 +23,11 @@ class Severity(IntEnum):
|
||||||
INFO = 1
|
INFO = 1
|
||||||
WARNING = 2
|
WARNING = 2
|
||||||
CRITICAL = 3
|
CRITICAL = 3
|
||||||
|
|
||||||
|
|
||||||
|
SEVERITY_TO_EMOJI = {
|
||||||
|
Severity.OK: "🟢",
|
||||||
|
Severity.INFO: "ℹ️",
|
||||||
|
Severity.WARNING: "⚠️",
|
||||||
|
Severity.CRITICAL: "🆘",
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from .cpu import cpu_check
|
from .cpu import cpu_check
|
||||||
from .ram import ram_check
|
from .ram import ram_check
|
||||||
|
from .remind import remind_check
|
||||||
from .temp import temp_check
|
from .temp import temp_check
|
||||||
from .vulnix import vulnix_check
|
from .vulnix import vulnix_check
|
||||||
|
|
|
||||||
43
src/lego_monitoring/checks/remind.py
Normal file
43
src/lego_monitoring/checks/remind.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
from typing import Any, Coroutine
|
||||||
|
|
||||||
|
from lego_monitoring.alerting.alert import Alert
|
||||||
|
from lego_monitoring.config.enums import CheckSet
|
||||||
|
from lego_monitoring.core.checkers import BaseChecker
|
||||||
|
|
||||||
|
|
||||||
|
def remind_check(checker_sets: dict[CheckSet, list[Coroutine | BaseChecker]]) -> list[Alert]:
|
||||||
|
alerts = []
|
||||||
|
for checker_set in checker_sets.values():
|
||||||
|
for c in checker_set:
|
||||||
|
if not isinstance(c, BaseChecker) or not c.persistent or not c.remind:
|
||||||
|
continue
|
||||||
|
alerts.extend(c.current_alerts)
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
# alert_num_by_state_with_max_type: dict[AlertType, list[Severity | int]] = {}
|
||||||
|
# for checker_set in checker_sets.values():
|
||||||
|
# for c in checker_set:
|
||||||
|
# if not isinstance(c, BaseChecker) or not c.persistent:
|
||||||
|
# continue
|
||||||
|
# for a in c.current_alerts:
|
||||||
|
# if a.alert_type not in alert_num_by_state_with_max_type:
|
||||||
|
# alert_num_by_state_with_max_type[a.alert_type] = [a.severity, 1]
|
||||||
|
# else:
|
||||||
|
# existing_list = alert_num_by_state_with_max_type[a.alert_type]
|
||||||
|
# if a.severity > existing_list[0]:
|
||||||
|
# existing_list[0] = a.severity
|
||||||
|
# existing_list[1] += 1
|
||||||
|
|
||||||
|
# if len(alert_num_by_state_with_max_type) == 0:
|
||||||
|
# return []
|
||||||
|
|
||||||
|
# message = "There are ongoing events:"
|
||||||
|
# for at, sev_count in alert_num_by_state_with_max_type.items():
|
||||||
|
# message += f"\n* {SEVERITY_TO_EMOJI[sev_count[0]]} {str(at)} - {sev_count[1]} alerts"
|
||||||
|
# message += (
|
||||||
|
# "\n\nUse /ongoing to see them or /status to see this short reminder again (NOT IMPLEMENTED YET)."
|
||||||
|
# + "\nYou will also be reminded daily until the situation is resolved."
|
||||||
|
# )
|
||||||
|
|
||||||
|
# alert = Alert(alert_type=AlertType.REMIND, message=message, severity=max(alert_num_by_state_with_max_type.keys()))
|
||||||
|
# return [alert]
|
||||||
|
|
@ -4,6 +4,7 @@ from enum import StrEnum
|
||||||
class CheckSet(StrEnum):
|
class CheckSet(StrEnum):
|
||||||
START = "start"
|
START = "start"
|
||||||
STOP = "stop"
|
STOP = "stop"
|
||||||
|
REMIND = "remind"
|
||||||
|
|
||||||
CPU = "cpu"
|
CPU = "cpu"
|
||||||
RAM = "ram"
|
RAM = "ram"
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,36 @@ from ..alerting.alert import Alert
|
||||||
from ..alerting.channel import send_alert
|
from ..alerting.channel import send_alert
|
||||||
from ..alerting.current import CurrentAlerts
|
from ..alerting.current import CurrentAlerts
|
||||||
from ..alerting.enum import Severity
|
from ..alerting.enum import Severity
|
||||||
from . import cvars
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BaseChecker:
|
class BaseChecker:
|
||||||
check: Callable | Coroutine
|
check: Callable | Coroutine
|
||||||
|
|
||||||
persistent: bool
|
persistent: bool
|
||||||
|
"""
|
||||||
|
Whether this checker remembers its last alerts.
|
||||||
|
Logically, persistent alerts show the system's ongoing state, rather that one-time events
|
||||||
|
"""
|
||||||
|
|
||||||
send_any_state: bool = False
|
send_any_state: bool = False
|
||||||
|
"""
|
||||||
|
False: this persistent checker only emits messages when its max alert severity is changed
|
||||||
|
|
||||||
|
True: this persistent checker emits messages every times it checks
|
||||||
|
|
||||||
|
Has no effect if persistent == False
|
||||||
|
"""
|
||||||
|
|
||||||
|
remind: bool = True
|
||||||
|
"""
|
||||||
|
False: this persistent checker's last alerts are reminded daily
|
||||||
|
|
||||||
|
True: this persistent checker's last alerts are not reminded daily
|
||||||
|
|
||||||
|
Has no effect if persistent == False
|
||||||
|
"""
|
||||||
|
|
||||||
check_args: list = field(default_factory=list)
|
check_args: list = field(default_factory=list)
|
||||||
check_kwargs: dict[str, Any] = field(default_factory=dict)
|
check_kwargs: dict[str, Any] = field(default_factory=dict)
|
||||||
current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False)
|
current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False)
|
||||||
|
|
@ -41,7 +63,7 @@ class BaseChecker:
|
||||||
new_types = self.current_alerts.get_types()
|
new_types = self.current_alerts.get_types()
|
||||||
if old_severity != new_severity or self.send_any_state:
|
if old_severity != new_severity or self.send_any_state:
|
||||||
for alert in alerts:
|
for alert in alerts:
|
||||||
await send_alert(alert, persistent=True)
|
await send_alert(alert, note="ongoing")
|
||||||
for alert_type in old_types - new_types:
|
for alert_type in old_types - new_types:
|
||||||
alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK)
|
alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK)
|
||||||
await send_alert(alert)
|
await send_alert(alert)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,10 @@ from contextvars import ContextVar
|
||||||
|
|
||||||
from telethon import TelegramClient
|
from telethon import TelegramClient
|
||||||
|
|
||||||
|
from lego_monitoring.alerting.current import CurrentAlerts
|
||||||
|
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
|
|
||||||
config: ContextVar[Config] = ContextVar("config")
|
config: ContextVar[Config] = ContextVar("config")
|
||||||
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
||||||
|
current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue