- name: Deploy Disk Usage Monitoring hosts: all become: yes vars_files: - ../infra_vars.yml - ../services_config.yml - ../infra_secrets.yml - ../services/uptime_kuma/uptime_kuma_vars.yml - ../services/ntfy/ntfy_vars.yml vars: disk_usage_threshold_percent: 80 disk_check_interval_minutes: 15 monitored_mount_point: "/" monitoring_script_dir: /opt/disk-monitoring monitoring_script_path: "{{ monitoring_script_dir }}/disk_usage_monitor.sh" log_file: "{{ monitoring_script_dir }}/disk_usage_monitor.log" systemd_service_name: disk-usage-monitor # Uptime Kuma configuration (auto-configured from services_config.yml and infra_secrets.yml) uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" tasks: - name: Validate Uptime Kuma configuration assert: that: - uptime_kuma_api_url is defined - uptime_kuma_api_url != "" - uptime_kuma_username is defined - uptime_kuma_username != "" - uptime_kuma_password is defined - uptime_kuma_password != "" fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" - name: Get hostname for monitor identification command: hostname register: host_name changed_when: false - name: Set monitor name and group based on hostname and mount point set_fact: monitor_name: "disk-usage-{{ host_name.stdout }}-{{ monitored_mount_point | replace('/', 'root') }}" monitor_friendly_name: "Disk Usage: {{ host_name.stdout }} ({{ monitored_mount_point }})" uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" - name: Create Uptime Kuma monitor setup script copy: dest: /tmp/setup_uptime_kuma_monitor.py content: | #!/usr/bin/env python3 import sys import json from uptime_kuma_api import UptimeKumaApi def main(): api_url = sys.argv[1] username = sys.argv[2] password = sys.argv[3] group_name = sys.argv[4] monitor_name = sys.argv[5] monitor_description = sys.argv[6] interval = int(sys.argv[7]) ntfy_topic = sys.argv[8] if len(sys.argv) > 8 else "alerts" api = UptimeKumaApi(api_url, timeout=60, wait_events=2.0) api.login(username, password) # Get all monitors monitors = api.get_monitors() # Get all notifications and find ntfy notification notifications = api.get_notifications() ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) notification_id_list = {} if ntfy_notification: notification_id_list[ntfy_notification['id']] = True # Find or create group group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) if not group: group_result = api.add_monitor(type='group', name=group_name) # Refresh to get the full group object with id monitors = api.get_monitors() group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) # Find or create/update push monitor existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) monitor_data = { 'type': 'push', 'name': monitor_name, 'parent': group['id'], 'interval': interval, 'upsideDown': True, 'description': monitor_description, 'notificationIDList': notification_id_list } if existing_monitor: monitor = api.edit_monitor(existing_monitor['id'], **monitor_data) # Refresh to get the full monitor object with pushToken monitors = api.get_monitors() monitor = next((m for m in monitors if m.get('name') == monitor_name), None) else: monitor_result = api.add_monitor(**monitor_data) # Refresh to get the full monitor object with pushToken monitors = api.get_monitors() monitor = next((m for m in monitors if m.get('name') == monitor_name), None) # Output result as JSON result = { 'monitor_id': monitor['id'], 'push_token': monitor['pushToken'], 'group_name': group_name, 'group_id': group['id'], 'monitor_name': monitor_name } print(json.dumps(result)) api.disconnect() if __name__ == '__main__': main() mode: '0755' delegate_to: localhost become: no - name: Run Uptime Kuma monitor setup script command: > {{ ansible_playbook_python }} /tmp/setup_uptime_kuma_monitor.py "{{ uptime_kuma_api_url }}" "{{ uptime_kuma_username }}" "{{ uptime_kuma_password }}" "{{ uptime_kuma_monitor_group }}" "{{ monitor_name }}" "{{ monitor_friendly_name }} - Alerts when usage exceeds {{ disk_usage_threshold_percent }}%" "{{ (disk_check_interval_minutes * 60) + 60 }}" "{{ ntfy_topic }}" register: monitor_setup_result delegate_to: localhost become: no changed_when: false - name: Parse monitor setup result set_fact: monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" - name: Set push URL and monitor ID as facts set_fact: uptime_kuma_disk_usage_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" uptime_kuma_monitor_id: "{{ monitor_info_parsed.monitor_id }}" - name: Install required packages for disk monitoring package: name: - curl state: present - name: Create monitoring script directory file: path: "{{ monitoring_script_dir }}" state: directory owner: root group: root mode: '0755' - name: Create disk usage monitoring script copy: dest: "{{ monitoring_script_path }}" content: | #!/bin/bash # Disk Usage Monitoring Script # Monitors disk usage and sends alerts to Uptime Kuma # Mode: "No news is good news" - only sends alerts when disk usage is HIGH LOG_FILE="{{ log_file }}" USAGE_THRESHOLD="{{ disk_usage_threshold_percent }}" UPTIME_KUMA_URL="{{ uptime_kuma_disk_usage_push_url }}" MOUNT_POINT="{{ monitored_mount_point }}" # Function to log messages log_message() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" } # Function to get disk usage percentage get_disk_usage() { local mount_point="$1" local usage="" # Get disk usage percentage (without % sign) usage=$(df -h "$mount_point" 2>/dev/null | awk 'NR==2 {gsub(/%/, "", $5); print $5}') if [ -z "$usage" ]; then log_message "ERROR: Could not read disk usage for $mount_point" return 1 fi echo "$usage" } # Function to get disk usage details get_disk_details() { local mount_point="$1" df -h "$mount_point" 2>/dev/null | awk 'NR==2 {print "Used: "$3" / Total: "$2" ("$5" full)"}' } # Function to send alert to Uptime Kuma when disk usage exceeds threshold # With upside-down mode enabled, sending status=up will trigger an alert send_uptime_kuma_alert() { local usage="$1" local details="$2" local message="DISK FULL WARNING: ${MOUNT_POINT} is ${usage}% full (Threshold: ${USAGE_THRESHOLD}%) - ${details}" log_message "ALERT: $message" # Send push notification to Uptime Kuma with status=up # In upside-down mode, status=up is treated as down/alert response=$(curl -s -w "\n%{http_code}" -G \ --data-urlencode "status=up" \ --data-urlencode "msg=$message" \ "$UPTIME_KUMA_URL" 2>&1) http_code=$(echo "$response" | tail -n1) if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log_message "Alert sent successfully to Uptime Kuma (HTTP $http_code)" else log_message "ERROR: Failed to send alert to Uptime Kuma (HTTP $http_code)" fi } # Main monitoring logic main() { log_message "Starting disk usage check for $MOUNT_POINT" # Get current disk usage current_usage=$(get_disk_usage "$MOUNT_POINT") if [ $? -ne 0 ] || [ -z "$current_usage" ]; then log_message "ERROR: Could not read disk usage" exit 1 fi # Get disk details disk_details=$(get_disk_details "$MOUNT_POINT") log_message "Current disk usage: ${current_usage}% - $disk_details" # Check if usage exceeds threshold if [ "$current_usage" -gt "$USAGE_THRESHOLD" ]; then log_message "WARNING: Disk usage ${current_usage}% exceeds threshold ${USAGE_THRESHOLD}%" send_uptime_kuma_alert "$current_usage" "$disk_details" else log_message "Disk usage is within normal range - no alert needed (no news is good news)" fi } # Run main function main owner: root group: root mode: '0755' - name: Create systemd service for disk usage monitoring copy: dest: "/etc/systemd/system/{{ systemd_service_name }}.service" content: | [Unit] Description=Disk Usage Monitor After=network.target [Service] Type=oneshot ExecStart={{ monitoring_script_path }} User=root StandardOutput=journal StandardError=journal [Install] WantedBy=multi-user.target owner: root group: root mode: '0644' - name: Create systemd timer for disk usage monitoring copy: dest: "/etc/systemd/system/{{ systemd_service_name }}.timer" content: | [Unit] Description=Run Disk Usage Monitor every {{ disk_check_interval_minutes }} minute(s) Requires={{ systemd_service_name }}.service [Timer] OnBootSec={{ disk_check_interval_minutes }}min OnUnitActiveSec={{ disk_check_interval_minutes }}min Persistent=true [Install] WantedBy=timers.target owner: root group: root mode: '0644' - name: Reload systemd daemon systemd: daemon_reload: yes - name: Enable and start disk usage monitoring timer systemd: name: "{{ systemd_service_name }}.timer" enabled: yes state: started - name: Test disk usage monitoring script command: "{{ monitoring_script_path }}" register: script_test changed_when: false - name: Verify script execution assert: that: - script_test.rc == 0 fail_msg: "Disk usage monitoring script failed to execute properly" - name: Clean up temporary Uptime Kuma setup script file: path: /tmp/setup_uptime_kuma_monitor.py state: absent delegate_to: localhost become: no