diff --git a/modules/options.nix b/modules/options.nix index 71be81c..8af2ab5 100644 --- a/modules/options.nix +++ b/modules/options.nix @@ -80,7 +80,10 @@ in Specify `default` as the slug to use this key for check types that don't have a key explicitly assigned to them. If you are unsure of the exact slug a check will generate, it is recommended to try it out with the default key first, before - assigning a specific one.''; + assigning a specific one. + + **Note**: checks will be auto-provisioned, but correct intervals and grace periods have to be configured manually from the web console, + otherwise silent failures will not be recorded until after 1 day (the default healthchecks interval).''; }; }; }; diff --git a/src/lego_monitoring/__init__.py b/src/lego_monitoring/__init__.py index 8da27fe..488bf1a 100644 --- a/src/lego_monitoring/__init__.py +++ b/src/lego_monitoring/__init__.py @@ -96,17 +96,17 @@ async def async_main(): command_manager = CommandHandlerManager(checkers) await command_manager.attach_handlers(tg_client) - cvars.tg_client.set(tg_client) else: logging.info("Telegram integration is disabled") tg_client = None + cvars.tg_client.set(tg_client) + if config.alert_channels.healthchecks is not None: healthchecks_client = sender.get_healthchecks_client() logging.info("Ready to send pings to healthchecks") cvars.healthchecks_client.set(healthchecks_client) else: - healthchecks_client = None logging.info("Healthchecks integration is disabled") signal.signal(signal.SIGTERM, stop_gracefully) diff --git a/src/lego_monitoring/alerting/commands.py b/src/lego_monitoring/alerting/commands.py index 96491a0..f87f562 100644 --- a/src/lego_monitoring/alerting/commands.py +++ b/src/lego_monitoring/alerting/commands.py @@ -56,7 +56,7 @@ class CommandHandlerManager: if not isinstance(c, BaseChecker) or not c.persistent: continue for a in c.current_alerts: - if a.alert_type not in alert_num_by_state_with_max_type: + if a.alert_type not in alert_num_by_state_with_max_type and a.severity != Severity.OK: alert_num_by_state_with_max_type[a.alert_type] = [a.severity, 1] else: existing_list = alert_num_by_state_with_max_type[a.alert_type] @@ -80,6 +80,8 @@ class CommandHandlerManager: if not isinstance(c, BaseChecker) or not c.persistent: continue for a in c.current_alerts: + if a.severity == Severity.OK: + continue message = format_message(a, note="ongoing") messages.add(message) if len(messages) == 0: diff --git a/src/lego_monitoring/alerting/enum.py b/src/lego_monitoring/alerting/enum.py index d014145..b79abff 100644 --- a/src/lego_monitoring/alerting/enum.py +++ b/src/lego_monitoring/alerting/enum.py @@ -22,7 +22,7 @@ class AlertType(StrEnum): class Severity(IntEnum): - OK = 0 # should only be used when persistent alerts resolve + OK = 0 INFO = 1 WARNING = 2 CRITICAL = 3 diff --git a/src/lego_monitoring/alerting/sender.py b/src/lego_monitoring/alerting/sender.py index 534fcf3..b9508f9 100644 --- a/src/lego_monitoring/alerting/sender.py +++ b/src/lego_monitoring/alerting/sender.py @@ -38,7 +38,7 @@ def format_message(alert: Alert, note: str) -> str: async def send_alert(alert: Alert, note: str = "") -> None: try: - client = cvars.tg_client.get() + tg_client = cvars.tg_client.get() except LookupError: # being called standalone # cvars.config.set(get_config()) # temp_client = True @@ -47,10 +47,10 @@ async def send_alert(alert: Alert, note: str = "") -> None: raise NotImplementedError # TODO else: ... # temp_client = False - if client is not None: + if tg_client is not None: room_id = cvars.config.get().alert_channels.telegram.room_id message = format_message(alert, note) - await client.send_message(entity=room_id, message=message) + await tg_client.send_message(entity=room_id, message=message) # if temp_client: # await client.close() diff --git a/src/lego_monitoring/checks/temp/__init__.py b/src/lego_monitoring/checks/temp/__init__.py index 6322a72..4da183a 100644 --- a/src/lego_monitoring/checks/temp/__init__.py +++ b/src/lego_monitoring/checks/temp/__init__.py @@ -26,4 +26,6 @@ def temp_check() -> list[Alert]: else: continue alert_list.append(alert) + if len(alert_list) == 0: + alert_list.append(Alert(alert_type=AlertType.TEMP, message="All sensors nominal", severity=Severity.OK)) return alert_list diff --git a/src/lego_monitoring/core/checkers.py b/src/lego_monitoring/core/checkers.py index 5000ee7..ccb6c41 100644 --- a/src/lego_monitoring/core/checkers.py +++ b/src/lego_monitoring/core/checkers.py @@ -24,7 +24,7 @@ class BaseChecker: """ False: this persistent checker only emits messages when its max alert severity is changed - True: this persistent checker emits messages every times it checks + True: this persistent checker emits messages every times it checks and any non-OK alerts are present Has no effect if persistent == False """ @@ -65,17 +65,15 @@ class BaseChecker: async def _handle_alerts(self, alerts: list[Alert]) -> None: if not self.persistent: for alert in alerts: - await send_alert(alert, "ongoing" if self.is_reminder else "") + if alert.severity != Severity.OK: + await send_alert(alert, "ongoing" if self.is_reminder else "") return - old_types = self.current_alerts.get_types() old_severity, new_severity = self.current_alerts.update(alerts) - new_types = self.current_alerts.get_types() - if old_severity != new_severity or self.send_any_state: + if (old_severity != new_severity or self.send_any_state) and not ( + old_severity == None and new_severity == Severity.OK + ): for alert in alerts: await send_alert(alert, note="ongoing") - for alert_type in old_types - new_types: - alert = Alert(alert_type=alert_type, message="Situation resolved", severity=Severity.OK) - await send_alert(alert) async def run_checker(self) -> None: raise NotImplementedError diff --git a/src/lego_monitoring/core/cvars.py b/src/lego_monitoring/core/cvars.py index 65dc6c0..1abaefb 100644 --- a/src/lego_monitoring/core/cvars.py +++ b/src/lego_monitoring/core/cvars.py @@ -9,6 +9,6 @@ from lego_monitoring.alerting.current import CurrentAlerts from ..config import Config config: ContextVar[Config] = ContextVar("config") -tg_client: ContextVar[Optional[TelegramClient]] = ContextVar("tg_client", default=None) +tg_client: ContextVar[Optional[TelegramClient]] = ContextVar("tg_client") healthchecks_client: ContextVar[Optional[HealthchecksClient]] = ContextVar("healthchecks_client", default=None) current_alerts: ContextVar[list[CurrentAlerts]] = ContextVar("current_alerts", default=[])