mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
persistent alerts
This commit is contained in:
parent
eef6ec59b0
commit
2c234b2fd0
5 changed files with 31 additions and 21 deletions
|
|
@ -56,7 +56,7 @@ async def async_main():
|
||||||
check_sets.START: [channel.send_start_alert()],
|
check_sets.START: [channel.send_start_alert()],
|
||||||
check_sets.STOP: [], # this is checked later
|
check_sets.STOP: [], # this is checked later
|
||||||
check_sets.CPU: [
|
check_sets.CPU: [
|
||||||
IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=5), persistent=True).run_checker()
|
IntervalChecker(checks.cpu_check, interval=datetime.timedelta(minutes=3), persistent=True).run_checker()
|
||||||
],
|
],
|
||||||
check_sets.RAM: [
|
check_sets.RAM: [
|
||||||
IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker()
|
IntervalChecker(checks.ram_check, interval=datetime.timedelta(minutes=1), persistent=True).run_checker()
|
||||||
|
|
@ -66,7 +66,7 @@ async def async_main():
|
||||||
],
|
],
|
||||||
check_sets.VULNIX: [
|
check_sets.VULNIX: [
|
||||||
IntervalChecker(
|
IntervalChecker(
|
||||||
checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_same_state=True
|
checks.vulnix_check, interval=datetime.timedelta(days=3), persistent=True, send_any_state=True
|
||||||
).run_checker()
|
).run_checker()
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ async def get_client() -> TelegramClient:
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
||||||
def format_message(alert: Alert) -> str:
|
def format_message(alert: Alert, persistent: bool) -> str:
|
||||||
match alert.severity:
|
match alert.severity:
|
||||||
case Severity.OK:
|
case Severity.OK:
|
||||||
severity_emoji = "🟢"
|
severity_emoji = "🟢"
|
||||||
|
|
@ -26,11 +26,12 @@ def format_message(alert: Alert) -> str:
|
||||||
severity_emoji = "⚠️"
|
severity_emoji = "⚠️"
|
||||||
case Severity.CRITICAL:
|
case Severity.CRITICAL:
|
||||||
severity_emoji = "🆘"
|
severity_emoji = "🆘"
|
||||||
message = f"{severity_emoji} {alert.alert_type} Alert\n{alert.message}"
|
persistent_marker = " - <i>ongoing</i>" if persistent else ""
|
||||||
|
message = f"{severity_emoji} {alert.alert_type} Alert{persistent_marker}\n{alert.message}"
|
||||||
return message
|
return message
|
||||||
|
|
||||||
|
|
||||||
async def send_alert(alert: Alert) -> None:
|
async def send_alert(alert: Alert, persistent: bool = False) -> None:
|
||||||
try:
|
try:
|
||||||
client = cvars.tg_client.get()
|
client = cvars.tg_client.get()
|
||||||
except LookupError: # being called standalone
|
except LookupError: # being called standalone
|
||||||
|
|
@ -42,7 +43,7 @@ async def send_alert(alert: Alert) -> None:
|
||||||
else:
|
else:
|
||||||
... # temp_client = False
|
... # temp_client = False
|
||||||
room_id = cvars.config.get().telegram.room_id
|
room_id = cvars.config.get().telegram.room_id
|
||||||
message = format_message(alert)
|
message = format_message(alert, persistent)
|
||||||
await client.send_message(entity=room_id, message=message)
|
await client.send_message(entity=room_id, message=message)
|
||||||
# if temp_client:
|
# if temp_client:
|
||||||
# await client.close()
|
# await client.close()
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ class CurrentAlerts(list[Alert]):
|
||||||
for a in self:
|
for a in self:
|
||||||
if max_severity is None or a.severity > max_severity:
|
if max_severity is None or a.severity > max_severity:
|
||||||
max_severity = a.severity
|
max_severity = a.severity
|
||||||
return a.severity
|
return max_severity
|
||||||
|
|
||||||
def get_types(self) -> set[AlertType]:
|
def get_types(self) -> set[AlertType]:
|
||||||
types = set()
|
types = set()
|
||||||
|
|
|
||||||
|
|
@ -6,15 +6,19 @@ from typing import Any, Callable, Coroutine
|
||||||
|
|
||||||
from ..alerting.alert import Alert
|
from ..alerting.alert import Alert
|
||||||
from ..alerting.channel import send_alert
|
from ..alerting.channel import send_alert
|
||||||
|
from ..alerting.current import CurrentAlerts
|
||||||
|
from ..alerting.enum import Severity
|
||||||
|
from . import cvars
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BaseChecker:
|
class BaseChecker:
|
||||||
check: Callable | Coroutine
|
check: Callable | Coroutine
|
||||||
persistent: bool
|
persistent: bool
|
||||||
send_same_state: bool = False
|
send_any_state: bool = False
|
||||||
check_args: list = field(default_factory=list)
|
check_args: list = field(default_factory=list)
|
||||||
check_kwargs: dict[str, Any] = field(default_factory=dict)
|
check_kwargs: dict[str, Any] = field(default_factory=dict)
|
||||||
|
current_alerts: CurrentAlerts = field(default_factory=CurrentAlerts, init=False)
|
||||||
|
|
||||||
async def _call_check(self) -> list[Alert]:
|
async def _call_check(self) -> list[Alert]:
|
||||||
if isinstance(self.check, Callable):
|
if isinstance(self.check, Callable):
|
||||||
|
|
@ -27,13 +31,22 @@ class BaseChecker:
|
||||||
raise TypeError(f"check is {type(self.check)}, neither function nor coroutine")
|
raise TypeError(f"check is {type(self.check)}, neither function nor coroutine")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def _handle_alert(alert: Alert, persistent: bool, send_same_state: bool) -> None:
|
async def _handle_alerts(self, alerts: list[Alert]) -> None:
|
||||||
if not persistent:
|
if not self.persistent:
|
||||||
|
for alert in alerts:
|
||||||
await send_alert(alert)
|
await send_alert(alert)
|
||||||
return
|
return
|
||||||
...
|
old_types = self.current_alerts.get_types()
|
||||||
|
old_severity, new_severity = self.current_alerts.update(alerts)
|
||||||
|
new_types = self.current_alerts.get_types()
|
||||||
|
if old_severity != new_severity or self.send_any_state:
|
||||||
|
for alert in alerts:
|
||||||
|
await send_alert(alert, persistent=True)
|
||||||
|
for alert_type in old_types - new_types:
|
||||||
|
alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK)
|
||||||
|
await send_alert(alert)
|
||||||
|
|
||||||
async def run_checker(self):
|
async def run_checker(self) -> None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -42,14 +55,13 @@ class IntervalChecker(BaseChecker):
|
||||||
_: KW_ONLY
|
_: KW_ONLY
|
||||||
interval: datetime.timedelta
|
interval: datetime.timedelta
|
||||||
|
|
||||||
async def run_checker(self):
|
async def run_checker(self) -> None:
|
||||||
interval_secs = self.interval.total_seconds()
|
interval_secs = self.interval.total_seconds()
|
||||||
while True:
|
while True:
|
||||||
logging.info(f"Calling {self.check.__name__}")
|
logging.info(f"Calling {self.check.__name__}")
|
||||||
result = await self._call_check()
|
result = await self._call_check()
|
||||||
logging.info(f"Got {len(result)} alerts")
|
logging.info(f"Got {len(result)} alerts")
|
||||||
for alert in result:
|
await self._handle_alerts(result)
|
||||||
await send_alert(alert)
|
|
||||||
await asyncio.sleep(interval_secs)
|
await asyncio.sleep(interval_secs)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -59,7 +71,7 @@ class ScheduledChecker(BaseChecker):
|
||||||
period: datetime.timedelta
|
period: datetime.timedelta
|
||||||
when: datetime.time
|
when: datetime.time
|
||||||
|
|
||||||
async def run_checker(self):
|
async def run_checker(self) -> None:
|
||||||
match self.period:
|
match self.period:
|
||||||
case datetime.timedelta(days=1):
|
case datetime.timedelta(days=1):
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -75,7 +87,6 @@ class ScheduledChecker(BaseChecker):
|
||||||
logging.info(f"Calling {self.check.__name__}")
|
logging.info(f"Calling {self.check.__name__}")
|
||||||
result = await self._call_check()
|
result = await self._call_check()
|
||||||
logging.info(f"Got {len(result)} alerts")
|
logging.info(f"Got {len(result)} alerts")
|
||||||
for alert in result:
|
await self._handle_alerts(result)
|
||||||
await send_alert(alert)
|
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,7 @@ from contextvars import ContextVar
|
||||||
|
|
||||||
from telethon import TelegramClient
|
from telethon import TelegramClient
|
||||||
|
|
||||||
from ..alerting.current import CurrentAlerts
|
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
|
|
||||||
config: ContextVar[Config] = ContextVar("config")
|
config: ContextVar[Config] = ContextVar("config")
|
||||||
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
tg_client: ContextVar[TelegramClient] = ContextVar("tg_client")
|
||||||
current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue