diff --git a/ansible/infra/910_docker_playbook.yml b/ansible/infra/910_docker_playbook.yml index 8e8e430..f137b6a 100644 --- a/ansible/infra/910_docker_playbook.yml +++ b/ansible/infra/910_docker_playbook.yml @@ -25,6 +25,7 @@ name: - ca-certificates - curl + - gnupg state: present - name: Create directory for Docker GPG key diff --git a/ansible/infra/nodito/32_zfs_pool_setup_playbook.yml b/ansible/infra/nodito/32_zfs_pool_setup_playbook.yml index 4ff0ed4..cb72328 100644 --- a/ansible/infra/nodito/32_zfs_pool_setup_playbook.yml +++ b/ansible/infra/nodito/32_zfs_pool_setup_playbook.yml @@ -170,3 +170,499 @@ fail: msg: "ZFS pool {{ zfs_pool_name }} is not in a healthy state" when: "'ONLINE' not in final_zfs_status.stdout" + +- name: Setup ZFS Pool Health Monitoring and Monthly Scrubs + hosts: nodito + become: true + vars_files: + - ../../infra_vars.yml + - ../../services_config.yml + - ../../infra_secrets.yml + - nodito_vars.yml + + vars: + zfs_check_interval_seconds: 86400 # 24 hours + zfs_check_timeout_seconds: 90000 # ~25 hours (interval + buffer) + zfs_check_retries: 1 + zfs_monitoring_script_dir: /opt/zfs-monitoring + zfs_monitoring_script_path: "{{ zfs_monitoring_script_dir }}/zfs_health_monitor.sh" + zfs_log_file: "{{ zfs_monitoring_script_dir }}/zfs_health_monitor.log" + zfs_systemd_health_service_name: zfs-health-monitor + zfs_systemd_scrub_service_name: zfs-monthly-scrub + uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" + + tasks: + - name: Validate Uptime Kuma configuration + assert: + that: + - uptime_kuma_api_url is defined + - uptime_kuma_api_url != "" + - uptime_kuma_username is defined + - uptime_kuma_username != "" + - uptime_kuma_password is defined + - uptime_kuma_password != "" + fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" + + - name: Get hostname for monitor identification + command: hostname + register: host_name + changed_when: false + + - name: Set monitor name and group based on hostname + set_fact: + monitor_name: "zfs-health-{{ host_name.stdout }}" + monitor_friendly_name: "ZFS Pool Health: {{ host_name.stdout }}" + uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" + + - name: Create Uptime Kuma ZFS health monitor setup script + copy: + dest: /tmp/setup_uptime_kuma_zfs_monitor.py + content: | + #!/usr/bin/env python3 + import sys + import json + from uptime_kuma_api import UptimeKumaApi + + def main(): + api_url = sys.argv[1] + username = sys.argv[2] + password = sys.argv[3] + group_name = sys.argv[4] + monitor_name = sys.argv[5] + monitor_description = sys.argv[6] + interval = int(sys.argv[7]) + retries = int(sys.argv[8]) + ntfy_topic = sys.argv[9] if len(sys.argv) > 9 else "alerts" + + api = UptimeKumaApi(api_url, timeout=120, wait_events=2.0) + api.login(username, password) + + # Get all monitors + monitors = api.get_monitors() + + # Get all notifications and find ntfy notification + notifications = api.get_notifications() + ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) + notification_id_list = {} + if ntfy_notification: + notification_id_list[ntfy_notification['id']] = True + + # Find or create group + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + if not group: + group_result = api.add_monitor(type='group', name=group_name) + # Refresh to get the full group object with id + monitors = api.get_monitors() + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + + # Find or create/update push monitor + existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + monitor_data = { + 'type': 'push', + 'name': monitor_name, + 'parent': group['id'], + 'interval': interval, + 'upsideDown': False, # Normal heartbeat mode: receiving pings = healthy + 'maxretries': retries, + 'description': monitor_description, + 'notificationIDList': notification_id_list + } + + if existing_monitor: + monitor = api.edit_monitor(existing_monitor['id'], **monitor_data) + # Refresh to get the full monitor object with pushToken + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + else: + monitor_result = api.add_monitor(**monitor_data) + # Refresh to get the full monitor object with pushToken + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + # Output result as JSON + result = { + 'monitor_id': monitor['id'], + 'push_token': monitor['pushToken'], + 'group_name': group_name, + 'group_id': group['id'], + 'monitor_name': monitor_name + } + print(json.dumps(result)) + + api.disconnect() + + if __name__ == '__main__': + main() + mode: '0755' + delegate_to: localhost + become: no + + - name: Run Uptime Kuma ZFS monitor setup script + command: > + {{ ansible_playbook_python }} + /tmp/setup_uptime_kuma_zfs_monitor.py + "{{ uptime_kuma_api_url }}" + "{{ uptime_kuma_username }}" + "{{ uptime_kuma_password }}" + "{{ uptime_kuma_monitor_group }}" + "{{ monitor_name }}" + "{{ monitor_friendly_name }} - Daily health check for pool {{ zfs_pool_name }}" + "{{ zfs_check_timeout_seconds }}" + "{{ zfs_check_retries }}" + "{{ ntfy_topic }}" + register: monitor_setup_result + delegate_to: localhost + become: no + changed_when: false + + - name: Parse monitor setup result + set_fact: + monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" + + - name: Set push URL and monitor ID as facts + set_fact: + uptime_kuma_zfs_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" + uptime_kuma_monitor_id: "{{ monitor_info_parsed.monitor_id }}" + + - name: Install required packages for ZFS monitoring + package: + name: + - curl + - jq + state: present + + - name: Create monitoring script directory + file: + path: "{{ zfs_monitoring_script_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Create ZFS health monitoring script + copy: + dest: "{{ zfs_monitoring_script_path }}" + content: | + #!/bin/bash + + # ZFS Pool Health Monitoring Script + # Checks ZFS pool health using JSON output and sends heartbeat to Uptime Kuma if healthy + # If any issues detected, does NOT send heartbeat (triggers timeout alert) + + LOG_FILE="{{ zfs_log_file }}" + UPTIME_KUMA_URL="{{ uptime_kuma_zfs_push_url }}" + POOL_NAME="{{ zfs_pool_name }}" + HOSTNAME=$(hostname) + + # Function to log messages + log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + # Function to check pool health using JSON output + check_pool_health() { + local pool="$1" + local issues_found=0 + + # Get pool status as JSON + local pool_json + pool_json=$(zpool status -j "$pool" 2>&1) + + if [ $? -ne 0 ]; then + log_message "ERROR: Failed to get pool status for $pool" + log_message " -> $pool_json" + return 1 + fi + + # Check 1: Pool state must be ONLINE + local pool_state + pool_state=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].state') + + if [ "$pool_state" != "ONLINE" ]; then + log_message "ISSUE: Pool state is $pool_state (expected ONLINE)" + issues_found=1 + else + log_message "OK: Pool state is ONLINE" + fi + + # Check 2: Check all vdevs and devices for non-ONLINE states + local bad_states + bad_states=$(echo "$pool_json" | jq -r --arg pool "$pool" ' + .pools[$pool].vdevs[] | + .. | objects | + select(.state? and .state != "ONLINE") | + "\(.name // "unknown"): \(.state)" + ' 2>/dev/null) + + if [ -n "$bad_states" ]; then + log_message "ISSUE: Found devices not in ONLINE state:" + echo "$bad_states" | while read -r line; do + log_message " -> $line" + done + issues_found=1 + else + log_message "OK: All devices are ONLINE" + fi + + # Check 3: Check for resilvering in progress + local scan_function scan_state + scan_function=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.function // "NONE"') + scan_state=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.state // "NONE"') + + if [ "$scan_function" = "RESILVER" ] && [ "$scan_state" = "SCANNING" ]; then + local resilver_progress + resilver_progress=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.issued // "unknown"') + log_message "ISSUE: Pool is currently resilvering (disk reconstruction in progress) - ${resilver_progress} processed" + issues_found=1 + fi + + # Check 4: Check for read/write/checksum errors on all devices + # Note: ZFS JSON output has error counts as strings, so convert to numbers for comparison + local devices_with_errors + devices_with_errors=$(echo "$pool_json" | jq -r --arg pool "$pool" ' + .pools[$pool].vdevs[] | + .. | objects | + select(.name? and ((.read_errors // "0" | tonumber) > 0 or (.write_errors // "0" | tonumber) > 0 or (.checksum_errors // "0" | tonumber) > 0)) | + "\(.name): read=\(.read_errors // 0) write=\(.write_errors // 0) cksum=\(.checksum_errors // 0)" + ' 2>/dev/null) + + if [ -n "$devices_with_errors" ]; then + log_message "ISSUE: Found devices with I/O errors:" + echo "$devices_with_errors" | while read -r line; do + log_message " -> $line" + done + issues_found=1 + else + log_message "OK: No read/write/checksum errors detected" + fi + + # Check 5: Check for scan errors (from last scrub/resilver) + local scan_errors + scan_errors=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.errors // "0"') + + if [ "$scan_errors" != "0" ] && [ "$scan_errors" != "null" ] && [ -n "$scan_errors" ]; then + log_message "ISSUE: Last scan reported $scan_errors errors" + issues_found=1 + else + log_message "OK: No scan errors" + fi + + return $issues_found + } + + # Function to get last scrub info for status message + get_scrub_info() { + local pool="$1" + local pool_json + pool_json=$(zpool status -j "$pool" 2>/dev/null) + + local scan_func scan_state scan_start + scan_func=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.function // "NONE"') + scan_state=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.state // "NONE"') + scan_start=$(echo "$pool_json" | jq -r --arg pool "$pool" '.pools[$pool].scan_stats.start_time // ""') + + if [ "$scan_func" = "SCRUB" ] && [ "$scan_state" = "SCANNING" ]; then + echo "scrub in progress (started $scan_start)" + elif [ "$scan_func" = "SCRUB" ] && [ -n "$scan_start" ]; then + echo "last scrub: $scan_start" + else + echo "no scrub history" + fi + } + + # Function to send heartbeat to Uptime Kuma + send_heartbeat() { + local message="$1" + + log_message "Sending heartbeat to Uptime Kuma: $message" + + # URL encode the message + local encoded_message + encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g; s/\//%2F/g') + + local response http_code + response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" 2>&1) + http_code=$(echo "$response" | tail -n1) + + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + log_message "Heartbeat sent successfully (HTTP $http_code)" + return 0 + else + log_message "ERROR: Failed to send heartbeat (HTTP $http_code)" + return 1 + fi + } + + # Main health check logic + main() { + log_message "==========================================" + log_message "Starting ZFS health check for pool: $POOL_NAME on $HOSTNAME" + + # Run all health checks + if check_pool_health "$POOL_NAME"; then + # All checks passed - send heartbeat + local scrub_info + scrub_info=$(get_scrub_info "$POOL_NAME") + + local message="Pool $POOL_NAME healthy ($scrub_info)" + send_heartbeat "$message" + + log_message "Health check completed: ALL OK" + exit 0 + else + # Issues found - do NOT send heartbeat (will trigger timeout alert) + log_message "Health check completed: ISSUES DETECTED - NOT sending heartbeat" + log_message "Uptime Kuma will alert after timeout due to missing heartbeat" + exit 1 + fi + } + + # Run main function + main + owner: root + group: root + mode: '0755' + + - name: Create systemd service for ZFS health monitoring + copy: + dest: "/etc/systemd/system/{{ zfs_systemd_health_service_name }}.service" + content: | + [Unit] + Description=ZFS Pool Health Monitor + After=zfs.target network.target + + [Service] + Type=oneshot + ExecStart={{ zfs_monitoring_script_path }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create systemd timer for daily ZFS health monitoring + copy: + dest: "/etc/systemd/system/{{ zfs_systemd_health_service_name }}.timer" + content: | + [Unit] + Description=Run ZFS Pool Health Monitor daily + Requires={{ zfs_systemd_health_service_name }}.service + + [Timer] + OnBootSec=5min + OnUnitActiveSec={{ zfs_check_interval_seconds }}sec + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Create systemd service for ZFS monthly scrub + copy: + dest: "/etc/systemd/system/{{ zfs_systemd_scrub_service_name }}.service" + content: | + [Unit] + Description=ZFS Monthly Scrub for {{ zfs_pool_name }} + After=zfs.target + + [Service] + Type=oneshot + ExecStart=/sbin/zpool scrub {{ zfs_pool_name }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create systemd timer for monthly ZFS scrub + copy: + dest: "/etc/systemd/system/{{ zfs_systemd_scrub_service_name }}.timer" + content: | + [Unit] + Description=Run ZFS Scrub on last day of every month at 4:00 AM + Requires={{ zfs_systemd_scrub_service_name }}.service + + [Timer] + OnCalendar=*-*~01 04:00:00 + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Reload systemd daemon + systemd: + daemon_reload: yes + + - name: Enable and start ZFS health monitoring timer + systemd: + name: "{{ zfs_systemd_health_service_name }}.timer" + enabled: yes + state: started + + - name: Enable and start ZFS monthly scrub timer + systemd: + name: "{{ zfs_systemd_scrub_service_name }}.timer" + enabled: yes + state: started + + - name: Test ZFS health monitoring script + command: "{{ zfs_monitoring_script_path }}" + register: script_test + changed_when: false + + - name: Verify script execution + assert: + that: + - script_test.rc == 0 + fail_msg: "ZFS health monitoring script failed - check pool health" + + - name: Display monitoring configuration + debug: + msg: | + ✓ ZFS Pool Health Monitoring deployed successfully! + + Monitor Name: {{ monitor_friendly_name }} + Monitor Group: {{ uptime_kuma_monitor_group }} + Pool Name: {{ zfs_pool_name }} + + Health Check: + - Frequency: Every {{ zfs_check_interval_seconds }} seconds (24 hours) + - Timeout: {{ zfs_check_timeout_seconds }} seconds (~25 hours) + - Script: {{ zfs_monitoring_script_path }} + - Log: {{ zfs_log_file }} + - Service: {{ zfs_systemd_health_service_name }}.service + - Timer: {{ zfs_systemd_health_service_name }}.timer + + Monthly Scrub: + - Schedule: Last day of month at 4:00 AM + - Service: {{ zfs_systemd_scrub_service_name }}.service + - Timer: {{ zfs_systemd_scrub_service_name }}.timer + + Conditions monitored: + - Pool state (must be ONLINE) + - Device states (no DEGRADED/FAULTED/OFFLINE/UNAVAIL) + - Resilver status (alerts if resilvering) + - Read/Write/Checksum errors + - Scrub errors + + - name: Clean up temporary Uptime Kuma setup script + file: + path: /tmp/setup_uptime_kuma_zfs_monitor.py + state: absent + delegate_to: localhost + become: no diff --git a/ansible/infra/nodito/34_nut_ups_setup_playbook.yml b/ansible/infra/nodito/34_nut_ups_setup_playbook.yml new file mode 100644 index 0000000..02468d5 --- /dev/null +++ b/ansible/infra/nodito/34_nut_ups_setup_playbook.yml @@ -0,0 +1,569 @@ +- name: Setup NUT (Network UPS Tools) for CyberPower UPS + hosts: nodito_host + become: true + vars_files: + - ../../infra_vars.yml + - nodito_vars.yml + - nodito_secrets.yml + + tasks: + # ------------------------------------------------------------------ + # Installation + # ------------------------------------------------------------------ + - name: Install NUT packages + apt: + name: + - nut + - nut-client + - nut-server + state: present + update_cache: true + + # ------------------------------------------------------------------ + # Verify UPS is detected + # ------------------------------------------------------------------ + - name: Check if UPS is detected via USB + shell: lsusb | grep -i cyber + register: lsusb_output + changed_when: false + failed_when: false + + - name: Display USB detection result + debug: + msg: "{{ lsusb_output.stdout | default('UPS not detected via USB - ensure it is plugged in') }}" + + - name: Fail if UPS not detected + fail: + msg: "CyberPower UPS not detected via USB. Ensure the USB cable is connected." + when: lsusb_output.rc != 0 + + - name: Reload udev rules for USB permissions + shell: | + udevadm control --reload-rules + udevadm trigger --subsystem-match=usb --action=add + changed_when: true + + - name: Verify USB device has nut group permissions + shell: | + BUS_DEV=$(lsusb | grep -i cyber | grep -oP 'Bus \K\d+|Device \K\d+' | tr '\n' '/' | sed 's/\/$//') + if [ -n "$BUS_DEV" ]; then + BUS=$(echo $BUS_DEV | cut -d'/' -f1) + DEV=$(echo $BUS_DEV | cut -d'/' -f2) + ls -la /dev/bus/usb/$BUS/$DEV + else + echo "UPS device not found" + exit 1 + fi + register: usb_permissions + changed_when: false + + - name: Display USB permissions + debug: + msg: "{{ usb_permissions.stdout }} (should show 'root nut', not 'root root')" + + - name: Scan for UPS with nut-scanner + command: nut-scanner -U + register: nut_scanner_output + changed_when: false + failed_when: false + + - name: Display nut-scanner result + debug: + msg: "{{ nut_scanner_output.stdout_lines }}" + + # ------------------------------------------------------------------ + # Configuration files + # ------------------------------------------------------------------ + - name: Configure NUT mode (standalone) + copy: + dest: /etc/nut/nut.conf + content: | + # Managed by Ansible + MODE=standalone + owner: root + group: nut + mode: "0640" + notify: Restart NUT services + + - name: Configure UPS device + copy: + dest: /etc/nut/ups.conf + content: | + # Managed by Ansible + [{{ ups_name }}] + driver = {{ ups_driver }} + port = {{ ups_port }} + desc = "{{ ups_desc }}" + offdelay = {{ ups_offdelay }} + ondelay = {{ ups_ondelay }} + owner: root + group: nut + mode: "0640" + notify: Restart NUT services + + - name: Configure upsd to listen on localhost + copy: + dest: /etc/nut/upsd.conf + content: | + # Managed by Ansible + LISTEN 127.0.0.1 3493 + owner: root + group: nut + mode: "0640" + notify: Restart NUT services + + - name: Configure upsd users + copy: + dest: /etc/nut/upsd.users + content: | + # Managed by Ansible + [{{ ups_user }}] + password = {{ ups_password }} + upsmon master + owner: root + group: nut + mode: "0640" + notify: Restart NUT services + + - name: Configure upsmon + copy: + dest: /etc/nut/upsmon.conf + content: | + # Managed by Ansible + MONITOR {{ ups_name }}@localhost 1 {{ ups_user }} {{ ups_password }} master + + MINSUPPLIES 1 + SHUTDOWNCMD "/sbin/shutdown -h +0" + POLLFREQ 5 + POLLFREQALERT 5 + HOSTSYNC 15 + DEADTIME 15 + POWERDOWNFLAG /etc/killpower + + # Notifications + NOTIFYMSG ONLINE "UPS %s on line power" + NOTIFYMSG ONBATT "UPS %s on battery" + NOTIFYMSG LOWBATT "UPS %s battery is low" + NOTIFYMSG FSD "UPS %s: forced shutdown in progress" + NOTIFYMSG COMMOK "Communications with UPS %s established" + NOTIFYMSG COMMBAD "Communications with UPS %s lost" + NOTIFYMSG SHUTDOWN "Auto logout and shutdown proceeding" + NOTIFYMSG REPLBATT "UPS %s battery needs replacing" + + # Log all events to syslog + NOTIFYFLAG ONLINE SYSLOG + NOTIFYFLAG ONBATT SYSLOG + NOTIFYFLAG LOWBATT SYSLOG + NOTIFYFLAG FSD SYSLOG + NOTIFYFLAG COMMOK SYSLOG + NOTIFYFLAG COMMBAD SYSLOG + NOTIFYFLAG SHUTDOWN SYSLOG + NOTIFYFLAG REPLBATT SYSLOG + owner: root + group: nut + mode: "0640" + notify: Restart NUT services + + # ------------------------------------------------------------------ + # Verify late-stage shutdown script + # ------------------------------------------------------------------ + - name: Verify nutshutdown script exists + stat: + path: /lib/systemd/system-shutdown/nutshutdown + register: nutshutdown_script + + - name: Warn if nutshutdown script is missing + debug: + msg: "WARNING: /lib/systemd/system-shutdown/nutshutdown not found. UPS may not cut power after shutdown." + when: not nutshutdown_script.stat.exists + + # ------------------------------------------------------------------ + # Services + # ------------------------------------------------------------------ + - name: Enable and start NUT driver enumerator + systemd: + name: nut-driver-enumerator + enabled: true + state: started + + - name: Enable and start NUT server + systemd: + name: nut-server + enabled: true + state: started + + - name: Enable and start NUT monitor + systemd: + name: nut-monitor + enabled: true + state: started + + # ------------------------------------------------------------------ + # Verification + # ------------------------------------------------------------------ + - name: Wait for NUT services to stabilize + pause: + seconds: 3 + + - name: Verify NUT can communicate with UPS + command: upsc {{ ups_name }}@localhost + register: upsc_output + changed_when: false + failed_when: upsc_output.rc != 0 + + - name: Display UPS status + debug: + msg: "{{ upsc_output.stdout_lines }}" + + - name: Get UPS status summary + shell: | + echo "Status: $(upsc {{ ups_name }}@localhost ups.status 2>/dev/null)" + echo "Battery: $(upsc {{ ups_name }}@localhost battery.charge 2>/dev/null)%" + echo "Runtime: $(upsc {{ ups_name }}@localhost battery.runtime 2>/dev/null)s" + echo "Load: $(upsc {{ ups_name }}@localhost ups.load 2>/dev/null)%" + register: ups_summary + changed_when: false + + - name: Display UPS summary + debug: + msg: "{{ ups_summary.stdout_lines }}" + + - name: Verify low battery thresholds + shell: | + echo "Runtime threshold: $(upsc {{ ups_name }}@localhost battery.runtime.low 2>/dev/null)s" + echo "Charge threshold: $(upsc {{ ups_name }}@localhost battery.charge.low 2>/dev/null)%" + register: thresholds + changed_when: false + + - name: Display low battery thresholds + debug: + msg: "{{ thresholds.stdout_lines }}" + + handlers: + - name: Restart NUT services + systemd: + name: "{{ item }}" + state: restarted + loop: + - nut-driver-enumerator + - nut-server + - nut-monitor + + +- name: Setup UPS Heartbeat Monitoring with Uptime Kuma + hosts: nodito + become: true + vars_files: + - ../../infra_vars.yml + - ../../services_config.yml + - ../../infra_secrets.yml + - nodito_vars.yml + - nodito_secrets.yml + + vars: + ups_heartbeat_interval_seconds: 60 + ups_heartbeat_timeout_seconds: 120 + ups_heartbeat_retries: 1 + ups_monitoring_script_dir: /opt/ups-monitoring + ups_monitoring_script_path: "{{ ups_monitoring_script_dir }}/ups_heartbeat.sh" + ups_log_file: "{{ ups_monitoring_script_dir }}/ups_heartbeat.log" + ups_systemd_service_name: ups-heartbeat + uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" + + tasks: + - name: Validate Uptime Kuma configuration + assert: + that: + - uptime_kuma_api_url is defined + - uptime_kuma_api_url != "" + - uptime_kuma_username is defined + - uptime_kuma_username != "" + - uptime_kuma_password is defined + - uptime_kuma_password != "" + fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" + + - name: Get hostname for monitor identification + command: hostname + register: host_name + changed_when: false + + - name: Set monitor name and group based on hostname + set_fact: + monitor_name: "ups-{{ host_name.stdout }}" + monitor_friendly_name: "UPS Status: {{ host_name.stdout }}" + uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" + + - name: Create Uptime Kuma UPS monitor setup script + copy: + dest: /tmp/setup_uptime_kuma_ups_monitor.py + content: | + #!/usr/bin/env python3 + import sys + import json + from uptime_kuma_api import UptimeKumaApi + + def main(): + api_url = sys.argv[1] + username = sys.argv[2] + password = sys.argv[3] + group_name = sys.argv[4] + monitor_name = sys.argv[5] + monitor_description = sys.argv[6] + interval = int(sys.argv[7]) + retries = int(sys.argv[8]) + ntfy_topic = sys.argv[9] if len(sys.argv) > 9 else "alerts" + + api = UptimeKumaApi(api_url, timeout=120, wait_events=2.0) + api.login(username, password) + + monitors = api.get_monitors() + notifications = api.get_notifications() + + ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) + notification_id_list = {} + if ntfy_notification: + notification_id_list[ntfy_notification['id']] = True + + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + if not group: + api.add_monitor(type='group', name=group_name) + monitors = api.get_monitors() + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + + existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + monitor_data = { + 'type': 'push', + 'name': monitor_name, + 'parent': group['id'], + 'interval': interval, + 'upsideDown': False, # Normal heartbeat mode: receiving pings = healthy + 'maxretries': retries, + 'description': monitor_description, + 'notificationIDList': notification_id_list + } + + if existing_monitor: + api.edit_monitor(existing_monitor['id'], **monitor_data) + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + else: + api.add_monitor(**monitor_data) + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + result = { + 'monitor_id': monitor['id'], + 'push_token': monitor['pushToken'], + 'group_name': group_name, + 'group_id': group['id'], + 'monitor_name': monitor_name + } + print(json.dumps(result)) + + api.disconnect() + + if __name__ == '__main__': + main() + mode: '0755' + delegate_to: localhost + become: no + + - name: Run Uptime Kuma UPS monitor setup script + command: > + {{ ansible_playbook_python }} + /tmp/setup_uptime_kuma_ups_monitor.py + "{{ uptime_kuma_api_url }}" + "{{ uptime_kuma_username }}" + "{{ uptime_kuma_password }}" + "{{ uptime_kuma_monitor_group }}" + "{{ monitor_name }}" + "{{ monitor_friendly_name }} - Alerts when UPS goes on battery or loses communication" + "{{ ups_heartbeat_timeout_seconds }}" + "{{ ups_heartbeat_retries }}" + "{{ ntfy_topic }}" + register: monitor_setup_result + delegate_to: localhost + become: no + changed_when: false + + - name: Parse monitor setup result + set_fact: + monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" + + - name: Set push URL as fact + set_fact: + uptime_kuma_ups_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" + + - name: Install required packages for UPS monitoring + package: + name: + - curl + state: present + + - name: Create monitoring script directory + file: + path: "{{ ups_monitoring_script_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Create UPS heartbeat monitoring script + copy: + dest: "{{ ups_monitoring_script_path }}" + content: | + #!/bin/bash + + # UPS Heartbeat Monitoring Script + # Sends heartbeat to Uptime Kuma only when UPS is on mains power + # When on battery or communication lost, no heartbeat is sent (triggers timeout alert) + + LOG_FILE="{{ ups_log_file }}" + UPTIME_KUMA_URL="{{ uptime_kuma_ups_push_url }}" + UPS_NAME="{{ ups_name }}" + + log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + send_heartbeat() { + local message="$1" + + local encoded_message + encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g; s/\//%2F/g; s/%/%25/g') + + local response http_code + response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" 2>&1) + http_code=$(echo "$response" | tail -n1) + + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + log_message "Heartbeat sent: $message (HTTP $http_code)" + return 0 + else + log_message "ERROR: Failed to send heartbeat (HTTP $http_code)" + return 1 + fi + } + + main() { + local status charge runtime load + + status=$(upsc ${UPS_NAME}@localhost ups.status 2>/dev/null) + + if [ -z "$status" ]; then + log_message "ERROR: Cannot communicate with UPS - NOT sending heartbeat" + exit 1 + fi + + charge=$(upsc ${UPS_NAME}@localhost battery.charge 2>/dev/null) + runtime=$(upsc ${UPS_NAME}@localhost battery.runtime 2>/dev/null) + load=$(upsc ${UPS_NAME}@localhost ups.load 2>/dev/null) + + if [[ "$status" == *"OL"* ]]; then + local message="UPS on mains (charge=${charge}% runtime=${runtime}s load=${load}%)" + send_heartbeat "$message" + exit 0 + else + log_message "UPS not on mains power (status=$status) - NOT sending heartbeat" + exit 1 + fi + } + + main + owner: root + group: root + mode: '0755' + + - name: Create systemd service for UPS heartbeat + copy: + dest: "/etc/systemd/system/{{ ups_systemd_service_name }}.service" + content: | + [Unit] + Description=UPS Heartbeat Monitor + After=network.target nut-monitor.service + + [Service] + Type=oneshot + ExecStart={{ ups_monitoring_script_path }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create systemd timer for UPS heartbeat + copy: + dest: "/etc/systemd/system/{{ ups_systemd_service_name }}.timer" + content: | + [Unit] + Description=Run UPS Heartbeat Monitor every {{ ups_heartbeat_interval_seconds }} seconds + Requires={{ ups_systemd_service_name }}.service + + [Timer] + OnBootSec=1min + OnUnitActiveSec={{ ups_heartbeat_interval_seconds }}sec + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Reload systemd daemon + systemd: + daemon_reload: yes + + - name: Enable and start UPS heartbeat timer + systemd: + name: "{{ ups_systemd_service_name }}.timer" + enabled: yes + state: started + + - name: Test UPS heartbeat script + command: "{{ ups_monitoring_script_path }}" + register: script_test + changed_when: false + + - name: Verify script execution + assert: + that: + - script_test.rc == 0 + fail_msg: "UPS heartbeat script failed - check UPS status and communication" + + - name: Display monitoring configuration + debug: + msg: + - "UPS Monitoring configured successfully" + - "" + - "NUT Configuration:" + - " UPS Name: {{ ups_name }}" + - " UPS Description: {{ ups_desc }}" + - " Off Delay: {{ ups_offdelay }}s (time after shutdown before UPS cuts power)" + - " On Delay: {{ ups_ondelay }}s (time after mains returns before UPS restores power)" + - "" + - "Uptime Kuma Monitoring:" + - " Monitor Name: {{ monitor_friendly_name }}" + - " Monitor Group: {{ uptime_kuma_monitor_group }}" + - " Push URL: {{ uptime_kuma_ups_push_url }}" + - " Heartbeat Interval: {{ ups_heartbeat_interval_seconds }}s" + - " Timeout: {{ ups_heartbeat_timeout_seconds }}s" + - "" + - "Scripts and Services:" + - " Script: {{ ups_monitoring_script_path }}" + - " Log: {{ ups_log_file }}" + - " Service: {{ ups_systemd_service_name }}.service" + - " Timer: {{ ups_systemd_service_name }}.timer" + + - name: Clean up temporary Uptime Kuma setup script + file: + path: /tmp/setup_uptime_kuma_ups_monitor.py + state: absent + delegate_to: localhost + become: no diff --git a/ansible/infra/nodito/nodito_vars.yml b/ansible/infra/nodito/nodito_vars.yml index f9e6b0d..c0002f3 100644 --- a/ansible/infra/nodito/nodito_vars.yml +++ b/ansible/infra/nodito/nodito_vars.yml @@ -17,3 +17,12 @@ zfs_pool_name: "proxmox-tank-1" zfs_disk_1: "/dev/disk/by-id/ata-ST4000NT001-3M2101_WX11TN0Z" # First disk for RAID 1 mirror zfs_disk_2: "/dev/disk/by-id/ata-ST4000NT001-3M2101_WX11TN2P" # Second disk for RAID 1 mirror zfs_pool_mountpoint: "/var/lib/vz" + +# UPS Configuration (CyberPower CP900EPFCLCD via USB) +ups_name: cyberpower +ups_desc: "CyberPower CP900EPFCLCD" +ups_driver: usbhid-ups +ups_port: auto +ups_user: counterweight +ups_offdelay: 120 # Seconds after shutdown before UPS cuts outlet power +ups_ondelay: 30 # Seconds after mains returns before UPS restores outlet power diff --git a/ansible/infra_secrets.yml.example b/ansible/infra_secrets.yml.example index cddc58a..14fd498 100644 --- a/ansible/infra_secrets.yml.example +++ b/ansible/infra_secrets.yml.example @@ -26,3 +26,8 @@ bitcoin_rpc_password: "CHANGE_ME_TO_SECURE_PASSWORD" # Mempool MariaDB credentials # Used by: services/mempool/deploy_mempool_playbook.yml mariadb_mempool_password: "CHANGE_ME_TO_SECURE_PASSWORD" + +# Forgejo Runner registration token +# Used by: services/forgejo-runner/deploy_forgejo_runner_playbook.yml +# See: services/forgejo-runner/SETUP.md for how to obtain this token +forgejo_runner_registration_token: "YOUR_RUNNER_TOKEN_HERE" diff --git a/ansible/services/forgejo-runner/SETUP.md b/ansible/services/forgejo-runner/SETUP.md new file mode 100644 index 0000000..a66d295 --- /dev/null +++ b/ansible/services/forgejo-runner/SETUP.md @@ -0,0 +1,28 @@ +# Forgejo Runner Setup + +## Obtaining the Registration Token + +1. Log in to the Forgejo instance at `https://forgejo.contrapeso.xyz` +2. Go to **Site Administration** > **Actions** > **Runners** +3. Click **Create new runner** +4. Copy the registration token + +## Configuring the Token + +Paste the token into `ansible/infra_secrets.yml`: + +```yaml +forgejo_runner_registration_token: "YOUR_TOKEN_HERE" +``` + +## Running the Playbook + +```bash +ansible-playbook ansible/services/forgejo-runner/deploy_forgejo_runner_playbook.yml +``` + +## Verifying + +1. On the VM: `systemctl status forgejo-runner` should show active +2. In Forgejo: **Site Administration** > **Actions** > **Runners** should show the runner as online +3. In Uptime Kuma: the `forgejo-runner-healthcheck` push monitor should be receiving pings diff --git a/ansible/services/forgejo-runner/deploy_forgejo_runner_playbook.yml b/ansible/services/forgejo-runner/deploy_forgejo_runner_playbook.yml new file mode 100644 index 0000000..a194178 --- /dev/null +++ b/ansible/services/forgejo-runner/deploy_forgejo_runner_playbook.yml @@ -0,0 +1,392 @@ +- name: Install Forgejo Runner on Debian 13 + hosts: forgejo_runner_local + become: yes + vars_files: + - ../../infra_vars.yml + - ../../services_config.yml + - ../../infra_secrets.yml + - ./forgejo_runner_vars.yml + vars: + uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" + healthcheck_interval_seconds: 60 + healthcheck_timeout_seconds: 90 + healthcheck_retries: 1 + healthcheck_script_dir: /opt/forgejo-runner-healthcheck + healthcheck_script_path: "{{ healthcheck_script_dir }}/forgejo_runner_healthcheck.sh" + healthcheck_log_file: "{{ healthcheck_script_dir }}/forgejo_runner_healthcheck.log" + healthcheck_service_name: forgejo-runner-healthcheck + + tasks: + # ── 1. Assert Docker is available ────────────────────────────────── + - name: Check if Docker is installed + command: docker --version + register: docker_check + changed_when: false + failed_when: docker_check.rc != 0 + + - name: Fail if Docker is not available + assert: + that: + - docker_check.rc == 0 + fail_msg: > + Docker is not installed or not in PATH. + Please install Docker before running this playbook. + + # ── 2. Download forgejo-runner binary ────────────────────────────── + - name: Download forgejo-runner binary + get_url: + url: "{{ forgejo_runner_url }}" + dest: "{{ forgejo_runner_bin_path }}" + mode: '0755' + + # ── 3. Create runner system user ─────────────────────────────────── + - name: Create runner system user + user: + name: "{{ forgejo_runner_user }}" + system: yes + shell: /usr/sbin/nologin + home: "{{ forgejo_runner_dir }}" + create_home: no + groups: docker + append: yes + comment: 'Forgejo Runner' + + # ── 4. Create working directory ──────────────────────────────────── + - name: Create forgejo-runner working directory + file: + path: "{{ forgejo_runner_dir }}" + state: directory + owner: "{{ forgejo_runner_user }}" + group: "{{ forgejo_runner_user }}" + mode: '0750' + + # ── 5. Generate default config ───────────────────────────────────── + - name: Check if config already exists + stat: + path: "{{ forgejo_runner_config_path }}" + register: config_stat + + - name: Generate default config + shell: "{{ forgejo_runner_bin_path }} generate-config > {{ forgejo_runner_config_path }}" + args: + chdir: "{{ forgejo_runner_dir }}" + when: not config_stat.stat.exists + + - name: Set config file ownership + file: + path: "{{ forgejo_runner_config_path }}" + owner: "{{ forgejo_runner_user }}" + group: "{{ forgejo_runner_user }}" + when: not config_stat.stat.exists + + # ── 6. Register runner ───────────────────────────────────────────── + - name: Check if runner is already registered + stat: + path: "{{ forgejo_runner_dir }}/.runner" + register: runner_stat + + - name: Register runner with Forgejo instance + command: > + {{ forgejo_runner_bin_path }} register --no-interactive + --instance {{ forgejo_instance_url }} + --token {{ forgejo_runner_registration_token }} + --name forgejo-runner-box + --labels "{{ forgejo_runner_labels }}" + args: + chdir: "{{ forgejo_runner_dir }}" + when: not runner_stat.stat.exists + + - name: Set runner registration file ownership + file: + path: "{{ forgejo_runner_dir }}/.runner" + owner: "{{ forgejo_runner_user }}" + group: "{{ forgejo_runner_user }}" + when: not runner_stat.stat.exists + + # ── 7. Create systemd service ────────────────────────────────────── + - name: Create forgejo-runner systemd service + copy: + dest: /etc/systemd/system/forgejo-runner.service + content: | + [Unit] + Description=Forgejo Runner + Documentation=https://forgejo.org/docs/latest/admin/actions/ + After=docker.service + Requires=docker.service + + [Service] + Type=simple + User={{ forgejo_runner_user }} + Group={{ forgejo_runner_user }} + WorkingDirectory={{ forgejo_runner_dir }} + ExecStart={{ forgejo_runner_bin_path }} daemon --config {{ forgejo_runner_config_path }} + Restart=on-failure + RestartSec=10 + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + # ── 8. Reload systemd, enable and start ──────────────────────────── + - name: Reload systemd + systemd: + daemon_reload: yes + + - name: Enable and start forgejo-runner service + systemd: + name: forgejo-runner + enabled: yes + state: started + + # ── 9. Verify runner is active ───────────────────────────────────── + - name: Verify forgejo-runner is active + command: systemctl is-active forgejo-runner + register: runner_active + changed_when: false + + - name: Assert runner is running + assert: + that: + - runner_active.stdout == "active" + fail_msg: "forgejo-runner service is not active: {{ runner_active.stdout }}" + + # ── 10. Set up Uptime Kuma push monitor ──────────────────────────── + - name: Create Uptime Kuma push monitor setup script + copy: + dest: /tmp/setup_forgejo_runner_monitor.py + content: | + #!/usr/bin/env python3 + import sys + import json + from uptime_kuma_api import UptimeKumaApi + + def main(): + api_url = sys.argv[1] + username = sys.argv[2] + password = sys.argv[3] + group_name = sys.argv[4] + monitor_name = sys.argv[5] + monitor_description = sys.argv[6] + interval = int(sys.argv[7]) + retries = int(sys.argv[8]) + ntfy_topic = sys.argv[9] if len(sys.argv) > 9 else "alerts" + + api = UptimeKumaApi(api_url, timeout=60, wait_events=2.0) + api.login(username, password) + + # Get all monitors + monitors = api.get_monitors() + + # Get all notifications and find ntfy notification + notifications = api.get_notifications() + ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) + notification_id_list = {} + if ntfy_notification: + notification_id_list[ntfy_notification['id']] = True + + # Find or create group + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + if not group: + group_result = api.add_monitor(type='group', name=group_name) + # Refresh to get the full group object with id + monitors = api.get_monitors() + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + + # Find or create/update push monitor + existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + monitor_data = { + 'type': 'push', + 'name': monitor_name, + 'parent': group['id'], + 'interval': interval, + 'upsideDown': False, + 'maxretries': retries, + 'description': monitor_description, + 'notificationIDList': notification_id_list + } + + if existing_monitor: + monitor = api.edit_monitor(existing_monitor['id'], **monitor_data) + # Refresh to get the full monitor object with pushToken + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + else: + monitor_result = api.add_monitor(**monitor_data) + # Refresh to get the full monitor object with pushToken + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + result = { + 'monitor_id': monitor['id'], + 'push_token': monitor['pushToken'], + 'group_name': group_name, + 'group_id': group['id'], + 'monitor_name': monitor_name + } + print(json.dumps(result)) + + api.disconnect() + + if __name__ == '__main__': + main() + mode: '0755' + delegate_to: localhost + become: no + + - name: Run Uptime Kuma push monitor setup + command: > + {{ ansible_playbook_python }} + /tmp/setup_forgejo_runner_monitor.py + "{{ uptime_kuma_api_url }}" + "{{ uptime_kuma_username }}" + "{{ uptime_kuma_password }}" + "services" + "forgejo-runner-healthcheck" + "Forgejo Runner healthcheck - ping every {{ healthcheck_interval_seconds }}s" + "{{ healthcheck_timeout_seconds }}" + "{{ healthcheck_retries }}" + "{{ ntfy_topic }}" + register: monitor_setup_result + delegate_to: localhost + become: no + changed_when: false + + - name: Parse monitor setup result + set_fact: + monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" + + - name: Set push URL + set_fact: + uptime_kuma_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" + + - name: Create healthcheck script directory + file: + path: "{{ healthcheck_script_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Create forgejo-runner healthcheck script + copy: + dest: "{{ healthcheck_script_path }}" + content: | + #!/bin/bash + + # Forgejo Runner Healthcheck Script + # Checks if forgejo-runner is active and pings Uptime Kuma on success + + LOG_FILE="{{ healthcheck_log_file }}" + UPTIME_KUMA_URL="{{ uptime_kuma_push_url }}" + + log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + main() { + if systemctl is-active --quiet forgejo-runner; then + log_message "forgejo-runner is active, sending ping" + response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=forgejo-runner%20is%20active" 2>&1) + http_code=$(echo "$response" | tail -n1) + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + log_message "Ping sent successfully (HTTP $http_code)" + else + log_message "ERROR: Failed to send ping (HTTP $http_code)" + exit 1 + fi + else + log_message "ERROR: forgejo-runner is not active" + exit 1 + fi + } + + main + owner: root + group: root + mode: '0755' + + - name: Create healthcheck systemd service + copy: + dest: "/etc/systemd/system/{{ healthcheck_service_name }}.service" + content: | + [Unit] + Description=Forgejo Runner Healthcheck + After=network.target + + [Service] + Type=oneshot + ExecStart={{ healthcheck_script_path }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create healthcheck systemd timer + copy: + dest: "/etc/systemd/system/{{ healthcheck_service_name }}.timer" + content: | + [Unit] + Description=Run Forgejo Runner Healthcheck every minute + Requires={{ healthcheck_service_name }}.service + + [Timer] + OnBootSec=30sec + OnUnitActiveSec={{ healthcheck_interval_seconds }}sec + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Reload systemd for healthcheck units + systemd: + daemon_reload: yes + + - name: Enable and start healthcheck timer + systemd: + name: "{{ healthcheck_service_name }}.timer" + enabled: yes + state: started + + - name: Test healthcheck script + command: "{{ healthcheck_script_path }}" + register: healthcheck_test + changed_when: false + + - name: Verify healthcheck script works + assert: + that: + - healthcheck_test.rc == 0 + fail_msg: "Healthcheck script failed to execute properly" + + - name: Display deployment summary + debug: + msg: | + Forgejo Runner deployed successfully! + + Runner Name: forgejo-runner-box + Instance: {{ forgejo_instance_url }} + Working Directory: {{ forgejo_runner_dir }} + Service: forgejo-runner.service ({{ runner_active.stdout }}) + + Healthcheck Monitor: forgejo-runner-healthcheck + Healthcheck Interval: Every {{ healthcheck_interval_seconds }}s + Timeout: {{ healthcheck_timeout_seconds }}s + + - name: Clean up temporary monitor setup script + file: + path: /tmp/setup_forgejo_runner_monitor.py + state: absent + delegate_to: localhost + become: no diff --git a/ansible/services/forgejo-runner/forgejo_runner_vars.yml b/ansible/services/forgejo-runner/forgejo_runner_vars.yml new file mode 100644 index 0000000..e618fca --- /dev/null +++ b/ansible/services/forgejo-runner/forgejo_runner_vars.yml @@ -0,0 +1,9 @@ +forgejo_runner_version: "6.3.1" +forgejo_runner_arch: "linux-amd64" +forgejo_runner_url: "https://code.forgejo.org/forgejo/runner/releases/download/v{{ forgejo_runner_version }}/forgejo-runner-{{ forgejo_runner_version }}-{{ forgejo_runner_arch }}" +forgejo_runner_bin_path: "/usr/local/bin/forgejo-runner" +forgejo_runner_user: "runner" +forgejo_runner_dir: "/opt/forgejo-runner" +forgejo_runner_config_path: "{{ forgejo_runner_dir }}/config.yml" +forgejo_runner_labels: "docker:docker://node:20-bookworm,ubuntu-latest:docker://node:20-bookworm,ubuntu-22.04:docker://node:20-bookworm,ubuntu-24.04:docker://node:20-bookworm" +forgejo_instance_url: "https://forgejo.contrapeso.xyz"