move existing stuff to archive dir (for now)

This commit is contained in:
Alex Tau 2025-04-27 20:39:07 +03:00
parent ae1204449c
commit 4fc491f61a
32 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,53 @@
import asyncio
import datetime
import logging
from typing import Callable, Coroutine
from alerting import alerts
async def _call_check(check: Callable | Coroutine, *args, **kwargs) -> list[alerts.Alert]:
if isinstance(check, Callable):
result = check(*args, **kwargs)
if isinstance(result, Coroutine):
result = await result
elif isinstance(check, Coroutine):
result = await check
else:
raise TypeError(f"check is {type(check)}, neither function nor coroutine")
return result
async def interval_checker(check: Callable | Coroutine, interval: datetime.timedelta, *args, **kwargs):
interval_secs = interval.total_seconds()
while True:
logging.info(f"Calling {check.__name__}")
result = await _call_check(check, *args, **kwargs)
logging.info(f"Got {len(result)} alerts")
for alert in result:
await alerts.send_alert(alert)
await asyncio.sleep(interval_secs)
async def scheduled_checker(
check: Callable | Coroutine, period: datetime.timedelta, when: datetime.time, *args, **kwargs
):
match period:
case datetime.timedelta(days=1):
while True:
now = datetime.datetime.now()
next_datetime = datetime.datetime.combine(datetime.date.today(), when)
if next_datetime < now:
next_datetime += datetime.timedelta(days=1)
logging.info(f"Scheduled to call {check.__name__} at {next_datetime.isoformat()}")
await asyncio.sleep(
(next_datetime - now).total_seconds()
) # might be negative at this point, asyncio doesn't care
logging.info(f"Calling {check.__name__}")
result = await _call_check(check, *args, **kwargs)
logging.info(f"Got {len(result)} alerts")
for alert in result:
await alerts.send_alert(alert)
case _:
raise NotImplementedError

276
archive-arch/misc/checks.py Normal file
View file

@ -0,0 +1,276 @@
import logging
import traceback
from datetime import timedelta
from alerting import alerts
from alerting.enum import AlertType, Severity
from misc import cvars, docker_registry, sensors, vuln
from misc.disks import LVAttr, WearoutIndicator, get_wearout_reading
IS_TESTING = False
def temp_check() -> list[alerts.Alert]:
alert_list = []
temps = sensors.Sensors.get_temperatures()
for _, sensor_list in temps.items():
for sensor in sensor_list:
if sensor.sensor_type == "nct6687":
continue # little valuable info and too low limits there, might as well ignore it
if sensor.critical_temp is not None and (IS_TESTING or sensor.current_temp > sensor.critical_temp):
alert = alerts.Alert(
alert_type=AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.critical_temp}°C",
severity=Severity.CRITICAL,
)
elif sensor.highest_temp is not None and (IS_TESTING or sensor.current_temp > sensor.highest_temp):
alert = alerts.Alert(
alert_type=AlertType("TEMP"),
message=f"{sensor.sensor_type} {sensor.sensor_label}: {sensor.current_temp}°C > {sensor.highest_temp}°C",
severity=Severity.WARNING,
)
else:
continue
alert_list.append(alert)
return alert_list
def cpu_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_cpu()
if IS_TESTING or sensor.current_load > sensor.critical_load:
alert = alerts.Alert(
alert_type=AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.critical_load}%",
severity=Severity.CRITICAL,
)
elif IS_TESTING or sensor.current_load > sensor.highest_load:
alert = alerts.Alert(
alert_type=AlertType("CPU"),
message=f"{sensor.current_load}% > {sensor.highest_load}%",
severity=Severity.WARNING,
)
else:
return []
return [alert]
def ram_check() -> list[alerts.Alert]:
sensor = sensors.Sensors.get_ram()
if IS_TESTING or sensor.current_avail < sensor.critical_avail:
alert = alerts.Alert(
alert_type=AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.critical_avail / 1024**3):.2f} GiB",
severity=Severity.CRITICAL,
)
elif IS_TESTING or sensor.current_avail < sensor.warning_avail:
alert = alerts.Alert(
alert_type=AlertType("RAM"),
message=f"{(sensor.current_avail / 1024**3):.2f} GiB < {(sensor.warning_avail / 1024**3):.2f} GiB",
severity=Severity.WARNING,
)
else:
return []
return [alert]
async def vuln_check() -> list[alerts.Alert]:
vulns = await vuln.get_vulns()
alert_list = []
for v in vulns:
if IS_TESTING or v.fixed or v.severity in (vuln.Severity.HIGH, vuln.Severity.CRITICAL):
match v.severity:
case vuln.Severity.LOW:
severity = Severity.INFO
case vuln.Severity.MEDIUM:
severity = Severity.WARNING
case vuln.Severity.HIGH | vuln.Severity.CRITICAL:
severity = Severity.CRITICAL
message = f"{v.id}: {v.vuln_type} in {','.join(v.packages)}"
html_message = f"<a href='{v.link}'>{v.id}</a>: {v.vuln_type} in {','.join(v.packages)}"
if v.fixed:
message.append(f" -- update to {v.fixed} ASAP")
html_message.append(f" -- update to {v.fixed} ASAP")
alert = alerts.Alert(
alert_type=AlertType.VULN,
message=message,
html_message=html_message,
severity=severity,
)
alert_list.append(alert)
return alert_list
async def ups_check() -> list[alerts.Alert]:
sensor = await sensors.Sensors.get_ups()
if not sensor:
return
alert_list = []
if IS_TESTING or sensor.battery_charge_percentage < sensor.battery_critical_percentage:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPS,
message=f"Battery is under {sensor.battery_critical_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=Severity.CRITICAL,
)
)
elif IS_TESTING or sensor.battery_charge_percentage < sensor.battery_warning_percentage:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPS,
message=f"Battery is under {sensor.battery_warning_percentage}%\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=Severity.WARNING,
)
)
for status in sensor.ups_status:
if IS_TESTING or status == sensors.UPSStatus.UPS_OVERLOAD:
alert_list.append(
alerts.Alert(alert_type=AlertType.UPS, message=f"UPS is overloaded!", severity=Severity.CRITICAL)
)
elif IS_TESTING or status == sensors.UPSStatus.ON_BATTERY:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPS,
message=f"UPS is on battery.\n{sensor.battery_charge_percentage}% ({timedelta(seconds=sensor.battery_runtime)}) remaining.",
severity=Severity.INFO,
)
)
elif IS_TESTING or status == sensors.UPSStatus.UPS_TRIM:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPS,
message=f"Overvoltage detected: trimming voltage to nominal.",
severity=Severity.INFO,
)
)
elif IS_TESTING or status == sensors.UPSStatus.UPS_BOOST:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPS,
message=f"Undervoltage detected: boosting voltage to nominal.",
severity=Severity.INFO,
)
)
return alert_list
async def docker_registry_check() -> list[alerts.Alert]:
updated_images = await docker_registry.get_updated_images()
alert_list = []
for image in updated_images:
alert_list.append(
alerts.Alert(
alert_type=AlertType.UPDATE,
message=f"{image} docker image: new version available",
severity=Severity.INFO,
)
)
return alert_list
def raid_check() -> list[alerts.Alert]:
check_config = cvars.config.get().checks.raid
alert_list = []
for lv in check_config.lvs:
try:
lv_attr = LVAttr.from_cli(lv)
except Exception as exc:
alert_list.append(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"Could not check RAID LV {lv}: {repr(exc)}, see logs",
severity=Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
continue
# sanity check
if lv_attr.vol_type not in [LVAttr.VolType.RAID, LVAttr.VolType.RAID_NOSYNC]:
alert_list.append(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"LV {lv} is not of RAID type",
severity=Severity.CRITICAL,
)
)
continue
if IS_TESTING:
alert_list.append(
alerts.Alert(
alert_type=AlertType.RAID,
message=f"Test alert: LV {lv} health is {lv_attr.health}",
severity=Severity.INFO,
)
)
match lv_attr.health:
case LVAttr.Health.PARTIAL:
alert_list.append(
alerts.Alert(
alert_type=AlertType.RAID,
message=f"LV {lv} operating in partial mode; one of PVs has failed",
severity=Severity.CRITICAL,
)
)
case LVAttr.Health.UNKNOWN:
alert_list.append(
alerts.Alert(
alert_type=AlertType.RAID,
message=f"LV {lv}'s state is unknown",
severity=Severity.CRITICAL,
)
)
case LVAttr.Health.REFRESH_NEEDED:
alert_list.append(
alerts.Alert(
alert_type=AlertType.RAID,
message=f"LV {lv} has suffered a write error; run a refresh or replace the failing PV",
severity=Severity.WARNING,
)
)
case LVAttr.Health.MISMATCHES:
alert_list.append(
alerts.Alert(
alert_type=AlertType.RAID,
message=f"LV {lv} is partially incoherent; run a repairing scrub operation",
severity=Severity.WARNING,
)
)
return alert_list
def disk_wearout_check() -> list[alerts.Alert]:
check_config = cvars.config.get().checks.wearout
alert_list = []
for disk in check_config.disks:
try:
wearout_reading = get_wearout_reading(disk.name)
except Exception as exc:
alert_list.append(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"Could not check wearout for disk {disk.name}: {repr(exc)}, see logs",
severity=Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
continue
if IS_TESTING or wearout_reading.current_reading < wearout_reading.threshold_reading:
match wearout_reading.indicator:
case WearoutIndicator.REALLOCATED_SECTORS:
message = f"Disk {disk.name} has reallocated sectors (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
case WearoutIndicator.SPARE_BLOCKS:
message = f"Disk {disk.name} has too few spare blocks (curr {wearout_reading.current_reading}, thresh {wearout_reading.threshold_reading})"
alert_list.append(
alerts.Alert(alert_type=AlertType.DISKS, message=message, severity=Severity[disk.severity])
)
return alert_list

View file

@ -0,0 +1,6 @@
import os
import tempfile
from pathlib import Path
CONFIG_FILE = (Path(os.path.dirname(os.path.realpath(__file__))) / ".." / "config.json").resolve()
TMP_DIR = Path(tempfile.gettempdir()) / "lego-monitoring"

View file

@ -0,0 +1,65 @@
import json
from dataclasses import dataclass
from alt_utils import NestedDeserializableDataclass
from alerting.enum import Severity
from misc.common import CONFIG_FILE
@dataclass
class MatrixConfig:
homeserver: str
user_id: str
device_id: str
access_token: str
room_id: str
@dataclass
class CheckDockerRegistryConfig:
hub_url: str
images: list[str]
@dataclass
class CheckRaidConfig:
lvs: list[str]
@dataclass
class CheckWearoutDiskConfig:
name: str
severity: Severity
@dataclass
class CheckWearoutConfig(NestedDeserializableDataclass):
disks: list[CheckWearoutDiskConfig]
@dataclass
class CheckLoginConfig:
hostname: str
@dataclass
class ChecksConfig(NestedDeserializableDataclass):
docker_registry: CheckDockerRegistryConfig
raid: CheckRaidConfig
wearout: CheckWearoutConfig
login: CheckLoginConfig
@dataclass
class Config(NestedDeserializableDataclass):
matrix: MatrixConfig
checks: ChecksConfig
disabled_checks: list[str]
def get_config() -> Config:
with open(CONFIG_FILE) as f:
cfg_dict = json.load(f)
cfg = Config.from_dict(cfg_dict)
return cfg

View file

@ -0,0 +1,8 @@
from contextvars import ContextVar
import nio
from misc.config import Config
config: ContextVar[Config] = ContextVar("config")
matrix_client: ContextVar[nio.AsyncClient] = ContextVar("matrix_client")

191
archive-arch/misc/disks.py Normal file
View file

@ -0,0 +1,191 @@
import json
import subprocess
from dataclasses import dataclass
from enum import Enum, StrEnum
from typing import Optional, Self
@dataclass
class LVAttr:
"""https://man.archlinux.org/man/lvs.8#NOTES"""
class VolType(StrEnum):
CACHE = "C"
MIRRORED = "m"
MIRRORED_NOSYNC = "M"
ORIGIN = "o"
ORIGIN_MERGING_SNAPSHOT = "O"
INTEGRITY = "g"
RAID = "r"
RAID_NOSYNC = "R"
SNAPSHOT = "s"
MERGING_SNAPSHOT = "S"
PVMOVE = "p"
VIRTUAL = "v"
IMAGE = "i"
IMAGE_OUT_OF_SYNC = "I"
MIRROR_LOG = "l"
CONVERTING = "c"
THIN = "V"
THIN_POOL = "t"
THIN_POOL_DATA = "T"
VDO_POOL = "d"
VDO_POOL_DATA = "D"
METADATA = "e"
NORMAL = "-"
class Permissions(StrEnum):
WRITABLE = "w"
READONLY = "r"
READONLY_ACTIVATED = "R"
class AllocationPolicy(StrEnum):
ANYWHERE = "a"
ANYWHERE_LOCKED = "A"
CONTIGUOUS = "c"
CONTIGUOUS_LOCKED = "C"
INHERITED = "i"
INHERITED_LOCKED = "I"
CLING = "l"
CLING_LOCKED = "L"
NORMAL = "n"
NORMAL_LOCKED = "N"
class State(StrEnum):
ACTIVE = "a"
HISTORICAL = "h"
SUSPENDED = "s"
INVALID_SNAPSHOT = "I"
INVALID_SUSPENDED_SNAPSHOT = "S"
SNAPSHOT_MERGE_FAILED = "m"
SUSPENDED_SNAPSHOT_MERGE_FAILED = "M"
DEVICE_PRESENT_NO_TABLES = "d"
DEVICE_PRESENT_INACTIVE_TABLE = "i"
THIN_POOL_CHECK_NEEDED = "c"
SUSPENDED_THIN_POOL_CHECK_NEEDED = "C"
UNKNOWN = "X"
class IsOpen(StrEnum):
OPEN = "o"
CLOSED = "-"
UNKNOWN = "X"
class TargetType(StrEnum):
CACHE = "C"
MIRROR = "m"
RAID = "r"
SNAPSHOT = "s"
THIN = "t"
UNKNOWN = "u"
VIRTUAL = "v"
NORMAL = "-"
class Health(StrEnum):
# for all
PARTIAL = "p"
UNKNOWN = "X"
OK = "-"
# for RAID
REFRESH_NEEDED = "r"
MISMATCHES = "m"
WRITEMOSTLY = "w"
RESHAPING = "s"
REMOVE = "R"
# for thin pools and LVs
FAILED = "F"
OUT_OF_SPACE = "D"
METADATA_READ_ONLY = "M"
# for writecache
ERROR = "E"
vol_type: VolType
permissions: Permissions
allocation_policy: AllocationPolicy
fixed_minor: bool
state: State
is_open: IsOpen
target_type: TargetType
zero_before_use: bool
health: Health
skip_activation: bool
name: Optional[str] = None
@classmethod
def from_str(cls, attr_str: str, name: Optional[str] = None) -> Self:
kwargs = {}
kwargs["vol_type"] = cls.VolType(attr_str[0])
kwargs["permissions"] = cls.Permissions(attr_str[1])
kwargs["allocation_policy"] = cls.AllocationPolicy(attr_str[2])
kwargs["fixed_minor"] = True if attr_str[3] == "m" else False
kwargs["state"] = cls.State(attr_str[4])
kwargs["is_open"] = cls.IsOpen(attr_str[5])
kwargs["target_type"] = cls.TargetType(attr_str[6])
kwargs["zero_before_use"] = True if attr_str[7] == "z" else False
kwargs["health"] = cls.Health(attr_str[8])
kwargs["skip_activation"] = True if attr_str[9] == "k" else False
kwargs["name"] = name
return cls(**kwargs)
@classmethod
def from_cli(cls, name: str) -> Self:
json_obj = json.loads(subprocess.run(["lvs", "--reportformat=json", name], capture_output=True).stdout)
attr_str = json_obj["report"][0]["lv"][0]["lv_attr"]
return cls.from_str(attr_str, name)
class WearoutIndicator(Enum):
REALLOCATED_SECTORS = 0
SPARE_BLOCKS = 1
@dataclass
class WearoutReading:
indicator: WearoutIndicator
current_reading: int
threshold_reading: int
def _get_wearout_reading_from_smartctl_output(smartctl_output: dict) -> WearoutReading:
disk_protocol = smartctl_output["device"]["protocol"]
rotation_rate = smartctl_output.get("rotation_rate", 0)
match rotation_rate:
case 0: # assuming non-rotating media is an SSD
indicator = WearoutIndicator.SPARE_BLOCKS
match disk_protocol:
case "ATA":
attr_table = smartctl_output["ata_smart_attributes"]["table"]
for a in attr_table:
if a["name"] == "Available_Reservd_Space":
value = a["value"]
threshold = a["thresh"]
break
else:
raise Exception(f"no Available_Reservd_Space on ATA SSD")
case "NVMe":
value = smartctl_output["nvme_smart_health_information_log"]["available_spare"]
threshold = smartctl_output["nvme_smart_health_information_log"]["available_spare_threshold"]
case _:
indicator = WearoutIndicator.REALLOCATED_SECTORS
match disk_protocol:
case "ATA":
attr_table = smartctl_output["ata_smart_attributes"]["table"]
for a in attr_table:
if a["name"] == "Reallocated_Sector_Ct":
value = a["value"]
threshold = a["thresh"]
break
else:
raise Exception(f"no Reallocated_Sector_Ct on ATA HDD")
case "NVMe": # ? NVMe HDDs are very rare, if they even exist
raise NotImplementedError
return WearoutReading(indicator, current_reading=value, threshold_reading=threshold)
def get_wearout_reading(disk: str) -> WearoutReading:
smartctl_output = json.loads(subprocess.run(["smartctl", "-ja", disk], capture_output=True).stdout.decode("utf-8"))
wearout_reading = _get_wearout_reading_from_smartctl_output(smartctl_output)
return wearout_reading

View file

@ -0,0 +1,129 @@
import datetime
import ipaddress
import json
import logging
import re
import socket
import traceback
from typing import Optional
from urllib.parse import urlparse
import uplink
from alerting import alerts
from alerting.enum import AlertType, Severity
from misc import cvars
class DockerHubClient(uplink.Consumer):
@uplink.returns.json
@uplink.get("v2/namespaces/{namespace}/repositories/{repository}/tags/latest")
def get_latest_tag(self, namespace: uplink.Path, repository: uplink.Path): ...
class DockerRegistryAuthorizer(uplink.Consumer):
@uplink.returns.json
@uplink.get()
def _get_token_unprotected(self, service: uplink.Query, scope: uplink.Query): ...
async def get_token(self, service: Optional[str], scope: Optional[str]) -> str:
host = urlparse(self.session.base_url).hostname
ips = set()
try:
ips.add(ipaddress.ip_address(host))
except:
addrinfo = socket.getaddrinfo(host, None)
for t in addrinfo:
ips.add(ipaddress.ip_address(t[4][0]))
for ip in ips:
if not ip.is_global:
raise Exception(f"{host} resolved to {ip} which is not global")
return (await self._get_token_unprotected(service, scope))["token"]
class DockerRegistryClient(uplink.Consumer):
@uplink.get("v2/{namespace}/{repository}/manifests/latest")
def _test_manifest(self, namespace: uplink.Path, repository: uplink.Path): ...
@uplink.returns.json
@uplink.get("v2/{namespace}/{repository}/manifests/latest")
def _get_manifest(self, namespace: uplink.Path, repository: uplink.Path): ...
@uplink.get("v2/{namespace}/{repository}/blobs/{digest}")
def _get_blob(self, namespace: uplink.Path, repository: uplink.Path, digest: uplink.Path): ...
async def get_auth_requirements(self, namespace: str, repository: str) -> Optional[tuple[str, str, str]]:
response = await self._test_manifest(namespace, repository)
if 200 <= response.status_code < 300:
return None
auth_regex = re.compile(r"([^\s,]+) ?[=] ?\"?([^\s,\"]+)\"?")
auth_keys = dict(auth_regex.findall(response.headers["Www-Authenticate"]))
return (auth_keys["realm"], auth_keys.get("service", None), auth_keys.get("scope", None))
async def get_updated_datetime_iso(self, namespace: str, repository: str) -> str:
manifest = await self._get_manifest(namespace, repository)
config_digest = manifest["config"]["digest"]
blob = json.loads(await (await self._get_blob(namespace, repository, digest=config_digest)).content.read())
return blob["created"]
async def get_updated_images() -> list[str]:
check_config = cvars.config.get().checks.docker_registry
hub_client = DockerHubClient(base_url=check_config.hub_url, client=uplink.AiohttpClient())
now = datetime.datetime.now(datetime.timezone.utc)
updated_images = []
for image in check_config.images:
image_split = image.split("/")
match len(image_split):
case 2:
namespace, repository = image_split
try:
last_updated_iso = (await hub_client.get_latest_tag(namespace=namespace, repository=repository))[
"tag_last_pushed"
]
except Exception as exc:
await alerts.send_alert(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"Could not query Docker Hub: {repr(exc)}, see logs",
severity=Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
return []
case 3:
registry, namespace, repository = image_split
registry_client = DockerRegistryClient(base_url=f"https://{registry}/", client=uplink.AiohttpClient())
try:
requirements = await registry_client.get_auth_requirements(namespace, repository)
if requirements is not None:
registry_authorizer = DockerRegistryAuthorizer(
base_url=requirements[0], client=uplink.AiohttpClient()
)
token = await registry_authorizer.get_token(requirements[1], requirements[2])
registry_client.session.headers["Authorization"] = f"Bearer {token}"
last_updated_iso = await registry_client.get_updated_datetime_iso(
namespace=namespace, repository=repository
)
except Exception as exc:
await alerts.send_alert(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"Could not query Docker registry {registry}: {repr(exc)}, see logs",
severity=Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
return []
case _:
raise Exception(f"Invalid image spec: {image}")
last_updated = datetime.datetime.fromisoformat(last_updated_iso)
logging.info(f"Image {image} last updated at {last_updated}")
if now - last_updated <= datetime.timedelta(days=1):
updated_images.append(image)
return updated_images

View file

@ -0,0 +1,167 @@
import subprocess
from dataclasses import dataclass
from enum import StrEnum
from psutil import cpu_percent, sensors_temperatures, virtual_memory
from alerting import alerts
from alerting.enum import AlertType, Severity
@dataclass
class TemperatureSensor:
sensor_type: str
sensor_label: str
current_temp: float
highest_temp: float | None = None
critical_temp: float | None = None
@dataclass
class CpuSensor:
current_load: float
highest_load: float = 90
critical_load: float = 95
@dataclass
class RamSensor:
current_avail: int
current_avail_percentage: float
warning_avail: int = 4 * 1024**3
critical_avail: int = 2 * 1024**3
class UPSStatus(StrEnum):
"""https://networkupstools.org/docs/developer-guide.chunked/new-drivers.html#_status_data"""
ON_LINE = "OL"
ON_BATTERY = "OB"
BATTERY_LOW = "LB"
BATTERY_HIGH = "HB"
BATTERY_REPLACE = "RB"
BATTERY_CHARGING = "CHRG"
BATTERY_DISCHARGING = "DISCHRG"
UPS_BYPASS = "BYPASS"
"""Battery and connected devices are not protected from power outage!"""
UPS_OFFLINE = "OFF"
UPS_OVERLOAD = "OVER"
UPS_CALIBRATION = "CAL"
UPS_TRIM = "TRIM"
UPS_BOOST = "BOOST"
UPS_FSD = "FSD"
@dataclass
class UPSSensor:
ups_status: list[UPSStatus] = None
battery_charge_percentage: int = None
battery_warning_percentage: int = 20
battery_critical_percentage: int = 10
battery_runtime: int = 1000
class Sensors:
@staticmethod
def get_temperatures() -> dict[str, list[TemperatureSensor]]:
psutil_temp_sensors = sensors_temperatures()
temp_sensors = {}
for s_type, sensors in psutil_temp_sensors.items():
if s_type not in temp_sensors.keys():
temp_sensors[s_type] = []
match (s_type):
case "nvme":
for sensor in sensors:
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label=sensor.label,
current_temp=sensor.current,
highest_temp=sensor.high,
critical_temp=sensor.critical,
)
)
case "amdgpu":
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label="Integrated GPU",
current_temp=sensors[0].current,
)
)
case "k10temp":
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label="AMD CPU",
current_temp=sensors[0].current,
critical_temp=95.0, # hardcoded because we have R9 7900X
)
)
case "nct6687":
lables = {
"AMD TSI Addr 98h": "CPU",
"Diode 0 (curr)": "System",
"Thermistor 15": "VRM MOSFET",
"Thermistor 1": "Platform Controller Hub (Peripherals)",
"Thermistor 16": "CPU Socket",
}
for sensor in sensors[:-2]:
real_label = lables[sensor.label]
temp_sensors[s_type].append(
TemperatureSensor(
sensor_type=s_type,
sensor_label=real_label,
current_temp=sensor.current,
highest_temp=sensor.high or None,
critical_temp=sensor.critical or None,
)
)
return temp_sensors
@staticmethod
def get_cpu() -> CpuSensor:
return CpuSensor(current_load=cpu_percent())
@staticmethod
def get_ram() -> RamSensor:
ram = virtual_memory()
return RamSensor(current_avail=ram.available, current_avail_percentage=ram.percent)
@staticmethod
async def get_ups() -> None | UPSSensor:
try:
raw_data = subprocess.run(["upsc", "cp1300"], stdout=subprocess.PIPE, encoding="utf-8")
except FileNotFoundError:
await alerts.send_alert(
alerts.Alert(
alert_type=AlertType.ERROR,
message="upsc is not installed!",
severity=Severity.CRITICAL,
)
)
return None
sensor_data = UPSSensor()
for line in raw_data.stdout.splitlines():
sensor, value = line.split(": ")[:2]
match sensor:
case "battery.charge":
sensor_data.battery_charge_percentage = int(value)
case "battery.charge.low":
sensor_data.battery_critical_percentage = int(value)
case "battery.charge.warning":
sensor_data.battery_warning_percentage = int(value)
case "battery.runtime":
sensor_data.battery_runtime = int(value)
case "ups.status":
sensor_data.ups_status = [UPSStatus(status) for status in value.split()]
case _:
...
return sensor_data

68
archive-arch/misc/vuln.py Normal file
View file

@ -0,0 +1,68 @@
import json
import logging
import subprocess
import traceback
from dataclasses import dataclass
from enum import StrEnum
from typing import Optional
from alerting import alerts
from alerting.enum import AlertType, Severity
class Severity(StrEnum):
LOW = "Low"
MEDIUM = "Medium"
HIGH = "High"
CRITICAL = "Critical"
@dataclass
class Vulnerability:
id: str
link: str
vuln_type: str
packages: list[str]
severity: Severity
fixed: Optional[str]
def _parse_arch_audit_output(output: str) -> list[Vulnerability]:
arch_audit_json = json.loads(output)
vulnerabilities = []
for v in arch_audit_json:
vulnerability = Vulnerability(
id=v["name"],
link=f"https://security.archlinux.org/{v['name']}",
vuln_type=v["type"],
packages=v["packages"],
severity=v["severity"],
fixed=v["fixed"],
)
vulnerabilities.append(vulnerability)
return vulnerabilities
async def get_vulns() -> list[Vulnerability]:
try:
arch_audit_output = subprocess.check_output(["arch-audit", "--json"])
except FileNotFoundError:
await alerts.send_alert(
alerts.Alert(
alert_type=AlertType.ERROR,
message="arch-audit not installed!",
severity=Severity.CRITICAL,
)
)
return []
except Exception as exc:
await alerts.send_alert(
alerts.Alert(
alert_type=AlertType.ERROR,
message=f"arch-audit failed to run: {repr(exc)}, see logs",
severity=Severity.CRITICAL,
)
)
logging.error(traceback.format_exc())
return []
return _parse_arch_audit_output(arch_audit_output)