- name: Deploy CPU Temperature Monitoring hosts: nodito_host become: yes vars_files: - ../infra_vars.yml - ../services_config.yml - ../infra_secrets.yml vars: temp_threshold_celsius: 80 temp_check_interval_minutes: 1 monitoring_script_dir: /opt/nodito-monitoring monitoring_script_path: "{{ monitoring_script_dir }}/cpu_temp_monitor.sh" log_file: "{{ monitoring_script_dir }}/cpu_temp_monitor.log" systemd_service_name: nodito-cpu-temp-monitor uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" ntfy_topic: "{{ service_settings.ntfy.topic }}" tasks: - name: Validate Uptime Kuma configuration assert: that: - uptime_kuma_api_url is defined - uptime_kuma_api_url != "" - uptime_kuma_username is defined - uptime_kuma_username != "" - uptime_kuma_password is defined - uptime_kuma_password != "" fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" - name: Get hostname for monitor identification command: hostname register: host_name changed_when: false - name: Set monitor name and group based on hostname set_fact: monitor_name: "cpu-temp-{{ host_name.stdout }}" monitor_friendly_name: "CPU Temperature: {{ host_name.stdout }}" uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" - name: Create Uptime Kuma CPU temperature monitor setup script copy: dest: /tmp/setup_uptime_kuma_cpu_temp_monitor.py content: | #!/usr/bin/env python3 import sys import json from uptime_kuma_api import UptimeKumaApi def main(): api_url = sys.argv[1] username = sys.argv[2] password = sys.argv[3] group_name = sys.argv[4] monitor_name = sys.argv[5] monitor_description = sys.argv[6] interval = int(sys.argv[7]) ntfy_topic = sys.argv[8] if len(sys.argv) > 8 else "alerts" api = UptimeKumaApi(api_url, timeout=60, wait_events=2.0) api.login(username, password) monitors = api.get_monitors() notifications = api.get_notifications() ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) notification_id_list = {} if ntfy_notification: notification_id_list[ntfy_notification['id']] = True group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) if not group: api.add_monitor(type='group', name=group_name) monitors = api.get_monitors() group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) monitor_data = { 'type': 'push', 'name': monitor_name, 'parent': group['id'], 'interval': interval, 'upsideDown': True, 'description': monitor_description, 'notificationIDList': notification_id_list } if existing_monitor: api.edit_monitor(existing_monitor['id'], **monitor_data) else: api.add_monitor(**monitor_data) monitors = api.get_monitors() monitor = next((m for m in monitors if m.get('name') == monitor_name), None) result = { 'monitor_id': monitor['id'], 'push_token': monitor['pushToken'], 'group_name': group_name, 'group_id': group['id'], 'monitor_name': monitor_name } print(json.dumps(result)) api.disconnect() if __name__ == '__main__': main() mode: '0755' delegate_to: localhost become: no - name: Run Uptime Kuma monitor setup script command: > {{ ansible_playbook_python }} /tmp/setup_uptime_kuma_cpu_temp_monitor.py "{{ uptime_kuma_api_url }}" "{{ uptime_kuma_username }}" "{{ uptime_kuma_password }}" "{{ uptime_kuma_monitor_group }}" "{{ monitor_name }}" "{{ monitor_friendly_name }} - Alerts when temperature exceeds {{ temp_threshold_celsius }}°C" "{{ (temp_check_interval_minutes * 60) + 60 }}" "{{ ntfy_topic }}" register: monitor_setup_result delegate_to: localhost become: no changed_when: false - name: Parse monitor setup result set_fact: monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" - name: Set push URL and monitor ID as facts set_fact: uptime_kuma_cpu_temp_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" uptime_kuma_monitor_id: "{{ monitor_info_parsed.monitor_id }}" - name: Install required packages for temperature monitoring package: name: - lm-sensors - curl - jq - bc state: present - name: Create monitoring script directory file: path: "{{ monitoring_script_dir }}" state: directory owner: root group: root mode: '0755' - name: Create CPU temperature monitoring script copy: dest: "{{ monitoring_script_path }}" content: | #!/bin/bash # CPU Temperature Monitoring Script # Monitors CPU temperature and sends alerts to Uptime Kuma LOG_FILE="{{ log_file }}" TEMP_THRESHOLD="{{ temp_threshold_celsius }}" UPTIME_KUMA_URL="{{ uptime_kuma_cpu_temp_push_url }}" log_message() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" } get_cpu_temp() { local temp="" if command -v sensors >/dev/null 2>&1; then temp=$(sensors 2>/dev/null | grep -E "Core 0|Package id 0|Tdie|Tctl" | head -1 | grep -oE '[0-9]+\.[0-9]+°C' | grep -oE '[0-9]+\.[0-9]+') fi if [ -z "$temp" ] && [ -f /sys/class/thermal/thermal_zone0/temp ]; then temp=$(cat /sys/class/thermal/thermal_zone0/temp) temp=$(echo "scale=1; $temp/1000" | bc -l 2>/dev/null || echo "$temp") fi if [ -z "$temp" ] && command -v acpi >/dev/null 2>&1; then temp=$(acpi -t 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) fi echo "$temp" } send_uptime_kuma_alert() { local temp="$1" local message="CPU Temperature Alert: ${temp}°C (Threshold: ${TEMP_THRESHOLD}°C)" log_message "ALERT: $message" encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/°/%C2%B0/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g') response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" 2>&1) http_code=$(echo "$response" | tail -n1) if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then log_message "Alert sent successfully to Uptime Kuma (HTTP $http_code)" else log_message "ERROR: Failed to send alert to Uptime Kuma (HTTP $http_code)" fi } main() { log_message "Starting CPU temperature check" current_temp=$(get_cpu_temp) if [ -z "$current_temp" ]; then log_message "ERROR: Could not read CPU temperature" exit 1 fi log_message "Current CPU temperature: ${current_temp}°C" if (( $(echo "$current_temp > $TEMP_THRESHOLD" | bc -l) )); then log_message "WARNING: CPU temperature ${current_temp}°C exceeds threshold ${TEMP_THRESHOLD}°C" send_uptime_kuma_alert "$current_temp" else log_message "CPU temperature is within normal range" fi } main owner: root group: root mode: '0755' - name: Create systemd service for CPU temperature monitoring copy: dest: "/etc/systemd/system/{{ systemd_service_name }}.service" content: | [Unit] Description=CPU Temperature Monitor After=network.target [Service] Type=oneshot ExecStart={{ monitoring_script_path }} User=root StandardOutput=journal StandardError=journal [Install] WantedBy=multi-user.target owner: root group: root mode: '0644' - name: Create systemd timer for CPU temperature monitoring copy: dest: "/etc/systemd/system/{{ systemd_service_name }}.timer" content: | [Unit] Description=Run CPU Temperature Monitor every {{ temp_check_interval_minutes }} minute(s) Requires={{ systemd_service_name }}.service [Timer] OnBootSec={{ temp_check_interval_minutes }}min OnUnitActiveSec={{ temp_check_interval_minutes }}min Persistent=true [Install] WantedBy=timers.target owner: root group: root mode: '0644' - name: Reload systemd daemon systemd: daemon_reload: yes - name: Enable and start CPU temperature monitoring timer systemd: name: "{{ systemd_service_name }}.timer" enabled: yes state: started - name: Test CPU temperature monitoring script command: "{{ monitoring_script_path }}" register: script_test changed_when: false - name: Verify script execution assert: that: - script_test.rc == 0 fail_msg: "CPU temperature monitoring script failed to execute properly" - name: Display monitoring configuration debug: msg: - "CPU Temperature Monitoring configured successfully" - "Temperature threshold: {{ temp_threshold_celsius }}°C" - "Check interval: {{ temp_check_interval_minutes }} minute(s)" - "Monitor Name: {{ monitor_friendly_name }}" - "Monitor Group: {{ uptime_kuma_monitor_group }}" - "Uptime Kuma Push URL: {{ uptime_kuma_cpu_temp_push_url }}" - "Monitoring script: {{ monitoring_script_path }}" - "Systemd Service: {{ systemd_service_name }}.service" - "Systemd Timer: {{ systemd_service_name }}.timer" - name: Clean up temporary Uptime Kuma setup script file: path: /tmp/setup_uptime_kuma_cpu_temp_monitor.py state: absent delegate_to: localhost become: no