- name: Setup NUT (Network UPS Tools) for CyberPower UPS hosts: nodito_host become: true vars_files: - ../../infra_vars.yml - nodito_vars.yml - nodito_secrets.yml tasks: # ------------------------------------------------------------------ # Installation # ------------------------------------------------------------------ - name: Install NUT packages apt: name: - nut - nut-client - nut-server state: present update_cache: true # ------------------------------------------------------------------ # Verify UPS is detected # ------------------------------------------------------------------ - name: Check if UPS is detected via USB shell: lsusb | grep -i cyber register: lsusb_output changed_when: false failed_when: false - name: Display USB detection result debug: msg: "{{ lsusb_output.stdout | default('UPS not detected via USB - ensure it is plugged in') }}" - name: Fail if UPS not detected fail: msg: "CyberPower UPS not detected via USB. Ensure the USB cable is connected." when: lsusb_output.rc != 0 - name: Reload udev rules for USB permissions shell: | udevadm control --reload-rules udevadm trigger --subsystem-match=usb --action=add changed_when: true - name: Verify USB device has nut group permissions shell: | BUS_DEV=$(lsusb | grep -i cyber | grep -oP 'Bus \K\d+|Device \K\d+' | tr '\n' '/' | sed 's/\/$//') if [ -n "$BUS_DEV" ]; then BUS=$(echo $BUS_DEV | cut -d'/' -f1) DEV=$(echo $BUS_DEV | cut -d'/' -f2) ls -la /dev/bus/usb/$BUS/$DEV else echo "UPS device not found" exit 1 fi register: usb_permissions changed_when: false - name: Display USB permissions debug: msg: "{{ usb_permissions.stdout }} (should show 'root nut', not 'root root')" - name: Scan for UPS with nut-scanner command: nut-scanner -U register: nut_scanner_output changed_when: false failed_when: false - name: Display nut-scanner result debug: msg: "{{ nut_scanner_output.stdout_lines }}" # ------------------------------------------------------------------ # Configuration files # ------------------------------------------------------------------ - name: Configure NUT mode (standalone) copy: dest: /etc/nut/nut.conf content: | # Managed by Ansible MODE=standalone owner: root group: nut mode: "0640" notify: Restart NUT services - name: Configure UPS device copy: dest: /etc/nut/ups.conf content: | # Managed by Ansible [{{ ups_name }}] driver = {{ ups_driver }} port = {{ ups_port }} desc = "{{ ups_desc }}" offdelay = {{ ups_offdelay }} ondelay = {{ ups_ondelay }} owner: root group: nut mode: "0640" notify: Restart NUT services - name: Configure upsd to listen on localhost copy: dest: /etc/nut/upsd.conf content: | # Managed by Ansible LISTEN 127.0.0.1 3493 owner: root group: nut mode: "0640" notify: Restart NUT services - name: Configure upsd users copy: dest: /etc/nut/upsd.users content: | # Managed by Ansible [{{ ups_user }}] password = {{ ups_password }} upsmon master owner: root group: nut mode: "0640" notify: Restart NUT services - name: Configure upsmon copy: dest: /etc/nut/upsmon.conf content: | # Managed by Ansible MONITOR {{ ups_name }}@localhost 1 {{ ups_user }} {{ ups_password }} master MINSUPPLIES 1 SHUTDOWNCMD "/sbin/shutdown -h +0" POLLFREQ 5 POLLFREQALERT 5 HOSTSYNC 15 DEADTIME 15 POWERDOWNFLAG /etc/killpower # Notifications NOTIFYMSG ONLINE "UPS %s on line power" NOTIFYMSG ONBATT "UPS %s on battery" NOTIFYMSG LOWBATT "UPS %s battery is low" NOTIFYMSG FSD "UPS %s: forced shutdown in progress" NOTIFYMSG COMMOK "Communications with UPS %s established" NOTIFYMSG COMMBAD "Communications with UPS %s lost" NOTIFYMSG SHUTDOWN "Auto logout and shutdown proceeding" NOTIFYMSG REPLBATT "UPS %s battery needs replacing" # Log all events to syslog NOTIFYFLAG ONLINE SYSLOG NOTIFYFLAG ONBATT SYSLOG NOTIFYFLAG LOWBATT SYSLOG NOTIFYFLAG FSD SYSLOG NOTIFYFLAG COMMOK SYSLOG NOTIFYFLAG COMMBAD SYSLOG NOTIFYFLAG SHUTDOWN SYSLOG NOTIFYFLAG REPLBATT SYSLOG owner: root group: nut mode: "0640" notify: Restart NUT services # ------------------------------------------------------------------ # Verify late-stage shutdown script # ------------------------------------------------------------------ - name: Verify nutshutdown script exists stat: path: /lib/systemd/system-shutdown/nutshutdown register: nutshutdown_script - name: Warn if nutshutdown script is missing debug: msg: "WARNING: /lib/systemd/system-shutdown/nutshutdown not found. UPS may not cut power after shutdown." when: not nutshutdown_script.stat.exists # ------------------------------------------------------------------ # Services # ------------------------------------------------------------------ - name: Enable and start NUT driver enumerator systemd: name: nut-driver-enumerator enabled: true state: started - name: Enable and start NUT server systemd: name: nut-server enabled: true state: started - name: Enable and start NUT monitor systemd: name: nut-monitor enabled: true state: started # ------------------------------------------------------------------ # Verification # ------------------------------------------------------------------ - name: Wait for NUT services to stabilize pause: seconds: 3 - name: Verify NUT can communicate with UPS command: upsc {{ ups_name }}@localhost register: upsc_output changed_when: false failed_when: upsc_output.rc != 0 - name: Display UPS status debug: msg: "{{ upsc_output.stdout_lines }}" - name: Get UPS status summary shell: | echo "Status: $(upsc {{ ups_name }}@localhost ups.status 2>/dev/null)" echo "Battery: $(upsc {{ ups_name }}@localhost battery.charge 2>/dev/null)%" echo "Runtime: $(upsc {{ ups_name }}@localhost battery.runtime 2>/dev/null)s" echo "Load: $(upsc {{ ups_name }}@localhost ups.load 2>/dev/null)%" register: ups_summary changed_when: false - name: Display UPS summary debug: msg: "{{ ups_summary.stdout_lines }}" - name: Verify low battery thresholds shell: | echo "Runtime threshold: $(upsc {{ ups_name }}@localhost battery.runtime.low 2>/dev/null)s" echo "Charge threshold: $(upsc {{ ups_name }}@localhost battery.charge.low 2>/dev/null)%" register: thresholds changed_when: false - name: Display low battery thresholds debug: msg: "{{ thresholds.stdout_lines }}" handlers: - name: Restart NUT services systemd: name: "{{ item }}" state: restarted loop: - nut-driver-enumerator - nut-server - nut-monitor - name: Setup UPS Heartbeat Monitoring with Uptime Kuma hosts: nodito become: true vars_files: - ../../infra_vars.yml - ../../services_config.yml - ../../infra_secrets.yml - nodito_vars.yml - nodito_secrets.yml vars: ups_heartbeat_interval_seconds: 60 ups_heartbeat_timeout_seconds: 120 ups_heartbeat_retries: 1 ups_monitoring_script_dir: /opt/ups-monitoring ups_monitoring_script_path: "{{ ups_monitoring_script_dir }}/ups_heartbeat.sh" ups_log_file: "{{ ups_monitoring_script_dir }}/ups_heartbeat.log" ups_systemd_service_name: ups-heartbeat uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" ntfy_topic: "{{ service_settings.ntfy.topic }}" tasks: - name: Validate Uptime Kuma configuration assert: that: - uptime_kuma_api_url is defined - uptime_kuma_api_url != "" - uptime_kuma_username is defined - uptime_kuma_username != "" - uptime_kuma_password is defined - uptime_kuma_password != "" fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" - name: Get hostname for monitor identification command: hostname register: host_name changed_when: false - name: Set monitor name and group based on hostname set_fact: monitor_name: "ups-{{ host_name.stdout }}" monitor_friendly_name: "UPS Status: {{ host_name.stdout }}" uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" - name: Create Uptime Kuma UPS monitor setup script copy: dest: /tmp/setup_uptime_kuma_ups_monitor.py content: | #!/usr/bin/env python3 import sys import json from uptime_kuma_api import UptimeKumaApi def main(): api_url = sys.argv[1] username = sys.argv[2] password = sys.argv[3] group_name = sys.argv[4] monitor_name = sys.argv[5] monitor_description = sys.argv[6] interval = int(sys.argv[7]) retries = int(sys.argv[8]) ntfy_topic = sys.argv[9] if len(sys.argv) > 9 else "alerts" api = UptimeKumaApi(api_url, timeout=120, wait_events=2.0) api.login(username, password) monitors = api.get_monitors() notifications = api.get_notifications() ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) notification_id_list = {} if ntfy_notification: notification_id_list[ntfy_notification['id']] = True group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) if not group: api.add_monitor(type='group', name=group_name) monitors = api.get_monitors() group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) monitor_data = { 'type': 'push', 'name': monitor_name, 'parent': group['id'], 'interval': interval, 'upsideDown': False, # Normal heartbeat mode: receiving pings = healthy 'maxretries': retries, 'description': monitor_description, 'notificationIDList': notification_id_list } if existing_monitor: api.edit_monitor(existing_monitor['id'], **monitor_data) monitors = api.get_monitors() monitor = next((m for m in monitors if m.get('name') == monitor_name), None) else: api.add_monitor(**monitor_data) monitors = api.get_monitors() monitor = next((m for m in monitors if m.get('name') == monitor_name), None) result = { 'monitor_id': monitor['id'], 'push_token': monitor['pushToken'], 'group_name': group_name, 'group_id': group['id'], 'monitor_name': monitor_name } print(json.dumps(result)) api.disconnect() if __name__ == '__main__': main() mode: '0755' delegate_to: localhost become: no - name: Run Uptime Kuma UPS monitor setup script command: > {{ ansible_playbook_python }} /tmp/setup_uptime_kuma_ups_monitor.py "{{ uptime_kuma_api_url }}" "{{ uptime_kuma_username }}" "{{ uptime_kuma_password }}" "{{ uptime_kuma_monitor_group }}" "{{ monitor_name }}" "{{ monitor_friendly_name }} - Alerts when UPS goes on battery or loses communication" "{{ ups_heartbeat_timeout_seconds }}" "{{ ups_heartbeat_retries }}" "{{ ntfy_topic }}" register: monitor_setup_result delegate_to: localhost become: no changed_when: false - name: Parse monitor setup result set_fact: monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" - name: Set push URL as fact set_fact: uptime_kuma_ups_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" - name: Install required packages for UPS monitoring package: name: - curl state: present - name: Create monitoring script directory file: path: "{{ ups_monitoring_script_dir }}" state: directory owner: root group: root mode: '0755' - name: Create UPS heartbeat monitoring script copy: dest: "{{ ups_monitoring_script_path }}" content: | #!/bin/bash # UPS Heartbeat Monitoring Script # Sends heartbeat to Uptime Kuma only when UPS is on mains power # When on battery or communication lost, no heartbeat is sent (triggers timeout alert) LOG_FILE="{{ ups_log_file }}" UPTIME_KUMA_URL="{{ uptime_kuma_ups_push_url }}" UPS_NAME="{{ ups_name }}" log_message() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" } send_heartbeat() { local message="$1" local encoded_message encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g; s/\//%2F/g; s/%/%25/g') local response http_code response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" 2>&1) http_code=$(echo "$response" | tail -n1) if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log_message "Heartbeat sent: $message (HTTP $http_code)" return 0 else log_message "ERROR: Failed to send heartbeat (HTTP $http_code)" return 1 fi } main() { local status charge runtime load status=$(upsc ${UPS_NAME}@localhost ups.status 2>/dev/null) if [ -z "$status" ]; then log_message "ERROR: Cannot communicate with UPS - NOT sending heartbeat" exit 1 fi charge=$(upsc ${UPS_NAME}@localhost battery.charge 2>/dev/null) runtime=$(upsc ${UPS_NAME}@localhost battery.runtime 2>/dev/null) load=$(upsc ${UPS_NAME}@localhost ups.load 2>/dev/null) if [[ "$status" == *"OL"* ]]; then local message="UPS on mains (charge=${charge}% runtime=${runtime}s load=${load}%)" send_heartbeat "$message" exit 0 else log_message "UPS not on mains power (status=$status) - NOT sending heartbeat" exit 1 fi } main owner: root group: root mode: '0755' - name: Create systemd service for UPS heartbeat copy: dest: "/etc/systemd/system/{{ ups_systemd_service_name }}.service" content: | [Unit] Description=UPS Heartbeat Monitor After=network.target nut-monitor.service [Service] Type=oneshot ExecStart={{ ups_monitoring_script_path }} User=root StandardOutput=journal StandardError=journal [Install] WantedBy=multi-user.target owner: root group: root mode: '0644' - name: Create systemd timer for UPS heartbeat copy: dest: "/etc/systemd/system/{{ ups_systemd_service_name }}.timer" content: | [Unit] Description=Run UPS Heartbeat Monitor every {{ ups_heartbeat_interval_seconds }} seconds Requires={{ ups_systemd_service_name }}.service [Timer] OnBootSec=1min OnUnitActiveSec={{ ups_heartbeat_interval_seconds }}sec Persistent=true [Install] WantedBy=timers.target owner: root group: root mode: '0644' - name: Reload systemd daemon systemd: daemon_reload: yes - name: Enable and start UPS heartbeat timer systemd: name: "{{ ups_systemd_service_name }}.timer" enabled: yes state: started - name: Test UPS heartbeat script command: "{{ ups_monitoring_script_path }}" register: script_test changed_when: false - name: Verify script execution assert: that: - script_test.rc == 0 fail_msg: "UPS heartbeat script failed - check UPS status and communication" - name: Display monitoring configuration debug: msg: - "UPS Monitoring configured successfully" - "" - "NUT Configuration:" - " UPS Name: {{ ups_name }}" - " UPS Description: {{ ups_desc }}" - " Off Delay: {{ ups_offdelay }}s (time after shutdown before UPS cuts power)" - " On Delay: {{ ups_ondelay }}s (time after mains returns before UPS restores power)" - "" - "Uptime Kuma Monitoring:" - " Monitor Name: {{ monitor_friendly_name }}" - " Monitor Group: {{ uptime_kuma_monitor_group }}" - " Push URL: {{ uptime_kuma_ups_push_url }}" - " Heartbeat Interval: {{ ups_heartbeat_interval_seconds }}s" - " Timeout: {{ ups_heartbeat_timeout_seconds }}s" - "" - "Scripts and Services:" - " Script: {{ ups_monitoring_script_path }}" - " Log: {{ ups_log_file }}" - " Service: {{ ups_systemd_service_name }}.service" - " Timer: {{ ups_systemd_service_name }}.timer" - name: Clean up temporary Uptime Kuma setup script file: path: /tmp/setup_uptime_kuma_ups_monitor.py state: absent delegate_to: localhost become: no