mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-12 13:45:20 +00:00
account for alert sending failing
This commit is contained in:
parent
40e30529eb
commit
10e79d6827
6 changed files with 84 additions and 32 deletions
|
|
@ -142,8 +142,9 @@ async def async_main():
|
|||
if stopping:
|
||||
if "self" in config.enabled_check_sets:
|
||||
alert = checks.generate_stop_alert()
|
||||
await sender.send_alert(alert)
|
||||
await sender.send_healthchecks_status(alert)
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
tg.create_task(sender.send_alert(alert))
|
||||
tg.create_task(sender.send_healthchecks_status(alert))
|
||||
for c in checkers:
|
||||
try:
|
||||
await c.graceful_stop()
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
import logging
|
||||
from socket import gethostname
|
||||
|
||||
import tenacity
|
||||
from returns.result import Failure, Success
|
||||
from telethon import TelegramClient
|
||||
from telethon.sessions import MemorySession
|
||||
from uplink import AiohttpClient
|
||||
|
||||
from ..checks.utils import format_for_healthchecks_slug
|
||||
from ..core import cvars
|
||||
from ..core.error_handling import log_errors_async
|
||||
from .alert import Alert
|
||||
from .clients.healthchecks import HealthchecksClient
|
||||
from .enum import SEVERITY_TO_EMOJI, Severity
|
||||
|
|
@ -38,27 +39,26 @@ def format_message(alert: Alert, note: str) -> str:
|
|||
return message
|
||||
|
||||
|
||||
async def send_alert(alert: Alert, note: str = "") -> None:
|
||||
async def send_alert(alert: Alert, note: str = "") -> Success[None] | Failure[tenacity.RetryError]:
|
||||
await log_errors_async(_send_alert(alert, note))
|
||||
|
||||
|
||||
async def send_healthchecks_status(alert: Alert) -> Success[None] | Failure[tenacity.RetryError]:
|
||||
await log_errors_async(_send_healthchecks_status(alert))
|
||||
|
||||
|
||||
@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60))
|
||||
async def _send_alert(alert: Alert, note: str = "") -> None:
|
||||
logging.debug(f"Sending {alert.alert_type} alert to Telegram")
|
||||
try:
|
||||
tg_client = cvars.tg_client.get()
|
||||
except LookupError: # being called standalone
|
||||
# cvars.config.set(get_config())
|
||||
# temp_client = True
|
||||
# client = await get_tg_client()
|
||||
# cvars.matrix_client.set(client)
|
||||
raise NotImplementedError # TODO
|
||||
else:
|
||||
... # temp_client = False
|
||||
tg_client = cvars.tg_client.get()
|
||||
if tg_client is not None:
|
||||
room_id = cvars.config.get().alert_channels.telegram.room_id
|
||||
message = format_message(alert, note)
|
||||
await tg_client.send_message(entity=room_id, message=message)
|
||||
# if temp_client:
|
||||
# await client.close()
|
||||
|
||||
|
||||
async def send_healthchecks_status(alert: Alert) -> None:
|
||||
@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60))
|
||||
async def _send_healthchecks_status(alert: Alert) -> None:
|
||||
def get_pinging_key(keys: dict[str, str]):
|
||||
if alert.healthchecks_slug in keys:
|
||||
return keys[alert.healthchecks_slug]
|
||||
|
|
|
|||
|
|
@ -70,21 +70,22 @@ class BaseChecker:
|
|||
return result
|
||||
|
||||
async def _handle_alerts(self, alerts: list[Alert]) -> None:
|
||||
if not self.is_reminder:
|
||||
for alert in alerts:
|
||||
await send_healthchecks_status(alert)
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
if not self.is_reminder:
|
||||
for alert in alerts:
|
||||
tg.create_task(send_healthchecks_status(alert))
|
||||
|
||||
if not self.persistent:
|
||||
for alert in alerts:
|
||||
if alert.severity != Severity.OK:
|
||||
await send_alert(alert, "ongoing" if self.is_reminder else "")
|
||||
return
|
||||
old_severity, new_severity = self.current_alerts.update(alerts)
|
||||
if (old_severity != new_severity or self.send_any_state) and not (
|
||||
old_severity == None and new_severity == Severity.OK
|
||||
):
|
||||
for alert in alerts:
|
||||
await send_alert(alert, note="ongoing")
|
||||
if not self.persistent:
|
||||
for alert in alerts:
|
||||
if alert.severity != Severity.OK:
|
||||
tg.create_task(send_alert(alert, "ongoing" if self.is_reminder else ""))
|
||||
return
|
||||
old_severity, new_severity = self.current_alerts.update(alerts)
|
||||
if (old_severity != new_severity or self.send_any_state) and not (
|
||||
old_severity == None and new_severity == Severity.OK
|
||||
):
|
||||
for alert in alerts:
|
||||
tg.create_task(send_alert(alert, note="ongoing"))
|
||||
|
||||
async def run_checker(self) -> None:
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
23
src/lego_monitoring/core/error_handling.py
Normal file
23
src/lego_monitoring/core/error_handling.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import logging
|
||||
import traceback
|
||||
from typing import Awaitable, Callable, TypeVar
|
||||
|
||||
from returns.result import Failure, Success
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def log_errors(function: Callable[..., T], *args, **kwargs) -> Success[T] | Failure[Exception]:
|
||||
try:
|
||||
return Success(function(args, kwargs))
|
||||
except Exception as e:
|
||||
logging.error(traceback.format_exc())
|
||||
return Failure(e)
|
||||
|
||||
|
||||
async def log_errors_async(awaitable: Awaitable[T]) -> Success[T] | Failure[Exception]:
|
||||
try:
|
||||
return Success(await awaitable)
|
||||
except Exception as e:
|
||||
logging.error(traceback.format_exc())
|
||||
return Failure(e)
|
||||
Loading…
Add table
Add a link
Reference in a new issue