mirror of
https://forgejo.altau.su/lego/lego-monitoring.git
synced 2026-03-10 04:41:10 +00:00
Merge branch 'ups-monitoring' into 'main'
UPS monitoring See merge request lego/lego-monitoring!3
This commit is contained in:
commit
1e1ebbb809
5 changed files with 172 additions and 5 deletions
|
|
@ -19,6 +19,7 @@ class AlertType(StrEnum):
|
|||
LOGIN = "LOGIN" # TODO
|
||||
SMART = "SMART" # TODO
|
||||
RAID = "RAID" # TODO
|
||||
UPS = "UPS"
|
||||
|
||||
|
||||
class Severity(Enum):
|
||||
|
|
|
|||
101
misc/checks.py
101
misc/checks.py
|
|
@ -1,5 +1,8 @@
|
|||
from datetime import timedelta
|
||||
|
||||
from alerting import alerts
|
||||
from misc import sensors, vuln
|
||||
from misc.enums import UPSStatus
|
||||
|
||||
IS_TESTING = False
|
||||
|
||||
|
|
@ -92,3 +95,101 @@ async def vuln_check() -> list[alerts.Alert]:
|
|||
)
|
||||
alert_list.append(alert)
|
||||
return alert_list
|
||||
|
||||
|
||||
async def ups_check() -> list[alerts.Alert]:
|
||||
sensor = await sensors.Sensors.get_ups()
|
||||
|
||||
if not sensor:
|
||||
return
|
||||
|
||||
alert_list = []
|
||||
|
||||
if IS_TESTING or sensor.battery_charge_percentage < sensor.battery_critical_percentage:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"Battery is under {sensor.battery_critical_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or sensor.battery_charge_percentage < sensor.battery_warning_percentage:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"Battery is under {sensor.battery_warning_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
)
|
||||
|
||||
for status in sensor.ups_status:
|
||||
if IS_TESTING or status == UPSStatus.BATTERY_REPLACE:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"UPS battery needs to be replaced ASAP!",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_OVERLOAD:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS, message=f"UPS is overloaded!", severity=alerts.Severity.CRITICAL
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_BYPASS:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"BYPASS MODE: Battery and connected devices are not protected from power outage!",
|
||||
severity=alerts.Severity.WARNING,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_CALIBRATION:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"UPS is currently performing runtime calibration.",
|
||||
severity=alerts.Severity.INFO,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.ON_BATTERY:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"UPS is on battery.\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
|
||||
severity=alerts.Severity.INFO,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_OFFLINE:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS, message=f"UPS seems to be offline.", severity=alerts.Severity.INFO
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_TRIM:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"Overvoltage detected: trimming voltage to nominal.",
|
||||
severity=alerts.Severity.INFO,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_BOOST:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"Undervoltage detected: boosting voltage to nominal.",
|
||||
severity=alerts.Severity.INFO,
|
||||
)
|
||||
)
|
||||
elif IS_TESTING or status == UPSStatus.UPS_FSD:
|
||||
alert_list.append(
|
||||
alerts.Alert(
|
||||
alert_type=alerts.AlertType.UPS,
|
||||
message=f"Shutdown imminent!",
|
||||
severity=alerts.Severity.CRITICAL,
|
||||
)
|
||||
)
|
||||
|
||||
return alert_list
|
||||
|
|
|
|||
21
misc/enums.py
Normal file
21
misc/enums.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
from enum import StrEnum
|
||||
|
||||
|
||||
class UPSStatus(StrEnum):
|
||||
"""https://networkupstools.org/docs/developer-guide.chunked/new-drivers.html#_status_data"""
|
||||
|
||||
ON_LINE = "OL"
|
||||
ON_BATTERY = "OB"
|
||||
BATTERY_LOW = "LB"
|
||||
BATTERY_HIGH = "HB"
|
||||
BATTERY_REPLACE = "RB"
|
||||
BATTERY_CHARGING = "CHRG"
|
||||
BATTERY_DISCHARGING = "DISCHRG"
|
||||
UPS_BYPASS = "BYPASS"
|
||||
"""Battery and connected devices are not protected from power outage!"""
|
||||
UPS_OFFLINE = "OFF"
|
||||
UPS_OVERLOAD = "OVER"
|
||||
UPS_CALIBRATION = "CAL"
|
||||
UPS_TRIM = "TRIM"
|
||||
UPS_BOOST = "BOOST"
|
||||
UPS_FSD = "FSD"
|
||||
|
|
@ -1,7 +1,11 @@
|
|||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
|
||||
from psutil import cpu_percent, sensors_temperatures, virtual_memory
|
||||
|
||||
from .enums import UPSStatus
|
||||
from alerting import alerts
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemperatureSensor:
|
||||
|
|
@ -27,6 +31,15 @@ class RamSensor:
|
|||
critical_avail: int = 2 * 1024**3
|
||||
|
||||
|
||||
@dataclass
|
||||
class UPSSensor:
|
||||
ups_status: list[UPSStatus] = None
|
||||
battery_charge_percentage: int = None
|
||||
battery_warning_percentage: int = 20
|
||||
battery_critical_percentage: int = 10
|
||||
battery_runtime: int = 1000
|
||||
|
||||
|
||||
class Sensors:
|
||||
@staticmethod
|
||||
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
|
||||
|
|
@ -96,10 +109,40 @@ class Sensors:
|
|||
@staticmethod
|
||||
def get_ram() -> RamSensor:
|
||||
ram = virtual_memory()
|
||||
return RamSensor(current_avail=ram.available,
|
||||
current_avail_percentage=ram.percent)
|
||||
return RamSensor(current_avail=ram.available, current_avail_percentage=ram.percent)
|
||||
|
||||
@staticmethod
|
||||
async def get_ups() -> None | UPSSensor:
|
||||
try:
|
||||
raw_data = subprocess.run(["upsc", "cp1300"], stdout=subprocess.PIPE, encoding="utf-8")
|
||||
except FileNotFoundError:
|
||||
await alerts.send_alert(alerts.Alert(
|
||||
alert_type=alerts.AlertType.ERROR,
|
||||
message="upsc is not installed!",
|
||||
severity=alerts.Severity.CRITICAL
|
||||
))
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in Sensors.get_temperatures():
|
||||
print(i)
|
||||
sensor_data = UPSSensor()
|
||||
|
||||
for line in raw_data.stdout.splitlines():
|
||||
sensor, value = line.split(": ")[:2]
|
||||
match sensor:
|
||||
case "battery.charge":
|
||||
sensor_data.battery_charge_percentage = int(value)
|
||||
case "battery.charge.low":
|
||||
# ? in case we need to evaluate critical% from sensor
|
||||
# sensor_data.battery_critical_percentage = int(value)
|
||||
sensor_data.battery_critical_percentage = 25
|
||||
case "battery.charge.warning":
|
||||
# ? in case we need to evaluate warning% from sensor
|
||||
# sensor_data.battery_warning_percentage = int(value)
|
||||
sensor_data.battery_warning_percentage = 50
|
||||
case "battery.runtime":
|
||||
sensor_data.battery_runtime = int(value)
|
||||
case "ups.status":
|
||||
sensor_data.ups_status = [UPSStatus(status) for status in value.split()]
|
||||
case _:
|
||||
...
|
||||
|
||||
return sensor_data
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ async def main():
|
|||
checkers = (
|
||||
checker(checks.temp_check, 5 * MINUTE, client),
|
||||
checker(checks.cpu_check, 5 * MINUTE, client),
|
||||
checker(checks.ups_check, 5 * MINUTE, client),
|
||||
checker(checks.ram_check, 1 * MINUTE, client),
|
||||
checker(checks.vuln_check, 1 * DAY, client),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue