account for alert sending failing

This commit is contained in:
Alex Tau 2025-12-19 16:34:10 +03:00
parent 40e30529eb
commit 10e79d6827
6 changed files with 84 additions and 32 deletions

View file

@ -142,8 +142,9 @@ async def async_main():
if stopping:
if "self" in config.enabled_check_sets:
alert = checks.generate_stop_alert()
await sender.send_alert(alert)
await sender.send_healthchecks_status(alert)
async with asyncio.TaskGroup() as tg:
tg.create_task(sender.send_alert(alert))
tg.create_task(sender.send_healthchecks_status(alert))
for c in checkers:
try:
await c.graceful_stop()

View file

@ -1,12 +1,13 @@
import logging
from socket import gethostname
import tenacity
from returns.result import Failure, Success
from telethon import TelegramClient
from telethon.sessions import MemorySession
from uplink import AiohttpClient
from ..checks.utils import format_for_healthchecks_slug
from ..core import cvars
from ..core.error_handling import log_errors_async
from .alert import Alert
from .clients.healthchecks import HealthchecksClient
from .enum import SEVERITY_TO_EMOJI, Severity
@ -38,27 +39,26 @@ def format_message(alert: Alert, note: str) -> str:
return message
async def send_alert(alert: Alert, note: str = "") -> None:
async def send_alert(alert: Alert, note: str = "") -> Success[None] | Failure[tenacity.RetryError]:
await log_errors_async(_send_alert(alert, note))
async def send_healthchecks_status(alert: Alert) -> Success[None] | Failure[tenacity.RetryError]:
await log_errors_async(_send_healthchecks_status(alert))
@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60))
async def _send_alert(alert: Alert, note: str = "") -> None:
logging.debug(f"Sending {alert.alert_type} alert to Telegram")
try:
tg_client = cvars.tg_client.get()
except LookupError: # being called standalone
# cvars.config.set(get_config())
# temp_client = True
# client = await get_tg_client()
# cvars.matrix_client.set(client)
raise NotImplementedError # TODO
else:
... # temp_client = False
tg_client = cvars.tg_client.get()
if tg_client is not None:
room_id = cvars.config.get().alert_channels.telegram.room_id
message = format_message(alert, note)
await tg_client.send_message(entity=room_id, message=message)
# if temp_client:
# await client.close()
async def send_healthchecks_status(alert: Alert) -> None:
@tenacity.retry(wait=tenacity.wait_random_exponential(multiplier=1, max=60))
async def _send_healthchecks_status(alert: Alert) -> None:
def get_pinging_key(keys: dict[str, str]):
if alert.healthchecks_slug in keys:
return keys[alert.healthchecks_slug]

View file

@ -70,21 +70,22 @@ class BaseChecker:
return result
async def _handle_alerts(self, alerts: list[Alert]) -> None:
if not self.is_reminder:
for alert in alerts:
await send_healthchecks_status(alert)
async with asyncio.TaskGroup() as tg:
if not self.is_reminder:
for alert in alerts:
tg.create_task(send_healthchecks_status(alert))
if not self.persistent:
for alert in alerts:
if alert.severity != Severity.OK:
await send_alert(alert, "ongoing" if self.is_reminder else "")
return
old_severity, new_severity = self.current_alerts.update(alerts)
if (old_severity != new_severity or self.send_any_state) and not (
old_severity == None and new_severity == Severity.OK
):
for alert in alerts:
await send_alert(alert, note="ongoing")
if not self.persistent:
for alert in alerts:
if alert.severity != Severity.OK:
tg.create_task(send_alert(alert, "ongoing" if self.is_reminder else ""))
return
old_severity, new_severity = self.current_alerts.update(alerts)
if (old_severity != new_severity or self.send_any_state) and not (
old_severity == None and new_severity == Severity.OK
):
for alert in alerts:
tg.create_task(send_alert(alert, note="ongoing"))
async def run_checker(self) -> None:
raise NotImplementedError

View file

@ -0,0 +1,23 @@
import logging
import traceback
from typing import Awaitable, Callable, TypeVar
from returns.result import Failure, Success
T = TypeVar("T")
def log_errors(function: Callable[..., T], *args, **kwargs) -> Success[T] | Failure[Exception]:
try:
return Success(function(args, kwargs))
except Exception as e:
logging.error(traceback.format_exc())
return Failure(e)
async def log_errors_async(awaitable: Awaitable[T]) -> Success[T] | Failure[Exception]:
try:
return Success(await awaitable)
except Exception as e:
logging.error(traceback.format_exc())
return Failure(e)