From 4a4c61308a5132c89c3a16fea8b375a1ca2565c8 Mon Sep 17 00:00:00 2001 From: counterweight Date: Sun, 26 Oct 2025 23:39:02 +0100 Subject: [PATCH] temp monitor --- .gitignore | 8 +- 01_infra_setup.md | 15 ++ .../nodito/30_proxmox_bootstrap_playbook.yml | 128 +++++++++++ ansible/infra/nodito/40_cpu_temp_alerts.yml | 203 ++++++++++++++++++ ansible/infra/nodito/nodito_vars.yml | 13 ++ 5 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 ansible/infra/nodito/30_proxmox_bootstrap_playbook.yml create mode 100644 ansible/infra/nodito/40_cpu_temp_alerts.yml create mode 100644 ansible/infra/nodito/nodito_vars.yml diff --git a/.gitignore b/.gitignore index f8fe8c8..312de7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ inventory.ini venv/* -.env \ No newline at end of file +.env + +# Secrets and sensitive files +*_secrets.yml +*_secrets.yaml +secrets/ +.secrets/ diff --git a/01_infra_setup.md b/01_infra_setup.md index db9e270..9474c2f 100644 --- a/01_infra_setup.md +++ b/01_infra_setup.md @@ -74,6 +74,21 @@ Note that, by applying these playbooks, both the root user and the `counterweigh Note that, by applying these playbooks, both the root user and the `counterweight` user will use the same SSH pubkey for auth, but root login will be disabled. +### Deploy CPU Temperature Monitoring + +* The nodito server can be configured with CPU temperature monitoring that sends alerts to Uptime Kuma when temperatures exceed a threshold. +* Before running the CPU temperature monitoring playbook, you need to create a secrets file with your Uptime Kuma push URL: + * Create `ansible/infra/nodito/nodito_secrets.yml` with: + ```yaml + uptime_kuma_url: "https://your-uptime-kuma.com/api/push/your-push-key" + ``` +* Run the CPU temperature monitoring setup with: `ansible-playbook -i inventory.ini infra/nodito/40_cpu_temp_alerts.yml` +* This will: + * Install required packages (lm-sensors, curl, jq, bc) + * Create a monitoring script that checks CPU temperature every minute + * Set up a systemd service and timer for automated monitoring + * Send alerts to Uptime Kuma when temperature exceeds the threshold (default: 80°C) + ## GPG Keys Some of the backups are stored encrypted for security. To allow this, fill in the gpg variables listed in `example.inventory.ini` under the `lapy` block. diff --git a/ansible/infra/nodito/30_proxmox_bootstrap_playbook.yml b/ansible/infra/nodito/30_proxmox_bootstrap_playbook.yml new file mode 100644 index 0000000..842d2c0 --- /dev/null +++ b/ansible/infra/nodito/30_proxmox_bootstrap_playbook.yml @@ -0,0 +1,128 @@ +- name: Bootstrap Nodito SSH Key Access + hosts: nodito + become: true + vars_files: + - ../infra_vars.yml + + tasks: + - name: Install sudo package + package: + name: sudo + state: present + + - name: Ensure SSH directory exists for root + file: + path: /root/.ssh + state: directory + mode: "0700" + owner: root + group: root + + - name: Install SSH public key for root + authorized_key: + user: root + key: "{{ lookup('file', ansible_ssh_private_key_file + '.pub') }}" + state: present + + - name: Ensure SSH key-based authentication is enabled + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?PubkeyAuthentication" + line: "PubkeyAuthentication yes" + state: present + backrefs: yes + + - name: Ensure AuthorizedKeysFile is properly configured + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?AuthorizedKeysFile" + line: "AuthorizedKeysFile .ssh/authorized_keys" + state: present + backrefs: yes + + - name: Restart SSH service + service: + name: ssh + state: restarted + + - name: Wait for SSH to be ready + wait_for: + port: "{{ ssh_port }}" + host: "{{ ansible_host }}" + delay: 2 + timeout: 30 + + - name: Test SSH key authentication + command: whoami + register: ssh_key_test + changed_when: false + + - name: Verify SSH key authentication works + assert: + that: + - ssh_key_test.stdout == "root" + fail_msg: "SSH key authentication failed - expected 'root', got '{{ ssh_key_test.stdout }}'" + + - name: Create new user + user: + name: "{{ new_user }}" + groups: sudo + shell: /bin/bash + state: present + create_home: yes + + - name: Set up SSH directory for new user + file: + path: "/home/{{ new_user }}/.ssh" + state: directory + mode: "0700" + owner: "{{ new_user }}" + group: "{{ new_user }}" + + - name: Install SSH public key for new user + authorized_key: + user: "{{ new_user }}" + key: "{{ lookup('file', ansible_ssh_private_key_file + '.pub') }}" + state: present + + - name: Allow new user to run sudo without password + copy: + dest: "/etc/sudoers.d/{{ new_user }}" + content: "{{ new_user }} ALL=(ALL) NOPASSWD:ALL" + owner: root + group: root + mode: "0440" + + - name: Disable root login + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?PermitRootLogin .*" + line: "PermitRootLogin no" + state: present + backrefs: yes + + - name: Disable password authentication + lineinfile: + path: /etc/ssh/sshd_config + regexp: "^#?PasswordAuthentication .*" + line: "PasswordAuthentication no" + state: present + backrefs: yes + + - name: Restart SSH service + service: + name: ssh + state: restarted + + - name: Wait for SSH to be ready + wait_for: + port: "{{ ssh_port }}" + host: "{{ ansible_host }}" + delay: 2 + timeout: 30 + + - name: Test connection with new user + command: whoami + become_user: "{{ new_user }}" + register: new_user_test + changed_when: false diff --git a/ansible/infra/nodito/40_cpu_temp_alerts.yml b/ansible/infra/nodito/40_cpu_temp_alerts.yml new file mode 100644 index 0000000..bbcde23 --- /dev/null +++ b/ansible/infra/nodito/40_cpu_temp_alerts.yml @@ -0,0 +1,203 @@ +- name: Deploy Nodito CPU Temperature Monitoring + hosts: nodito + become: yes + vars_files: + - ../../infra_vars.yml + - ./nodito_vars.yml + - ./nodito_secrets.yml + + tasks: + - name: Validate Uptime Kuma URL is provided + assert: + that: + - nodito_uptime_kuma_cpu_temp_push_url != "" + fail_msg: "uptime_kuma_url must be set in nodito_secrets.yml" + + - name: Install required packages for temperature monitoring + package: + name: + - lm-sensors + - curl + - jq + - bc + state: present + + - name: Create monitoring script directory + file: + path: "{{ monitoring_script_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Create CPU temperature monitoring script + copy: + dest: "{{ monitoring_script_path }}" + content: | + #!/bin/bash + + # CPU Temperature Monitoring Script for Nodito + # Monitors CPU temperature and sends alerts to Uptime Kuma + + LOG_FILE="{{ log_file }}" + TEMP_THRESHOLD="{{ temp_threshold_celsius }}" + UPTIME_KUMA_URL="{{ nodito_uptime_kuma_cpu_temp_push_url }}" + + # Function to log messages + log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + # Function to get CPU temperature + get_cpu_temp() { + # Try different methods to get CPU temperature + local temp="" + + # Method 1: sensors command (most common) + if command -v sensors >/dev/null 2>&1; then + temp=$(sensors 2>/dev/null | grep -E "Core 0|Package id 0|Tdie|Tctl" | head -1 | grep -oE '[0-9]+\.[0-9]+°C' | grep -oE '[0-9]+\.[0-9]+') + fi + + # Method 2: thermal zone (fallback) + if [ -z "$temp" ] && [ -f /sys/class/thermal/thermal_zone0/temp ]; then + temp=$(cat /sys/class/thermal/thermal_zone0/temp) + temp=$(echo "scale=1; $temp/1000" | bc -l 2>/dev/null || echo "$temp") + fi + + # Method 3: acpi (fallback) + if [ -z "$temp" ] && command -v acpi >/dev/null 2>&1; then + temp=$(acpi -t 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + fi + + echo "$temp" + } + + # Function to send alert to Uptime Kuma + send_uptime_kuma_alert() { + local temp="$1" + local message="CPU Temperature Alert: ${temp}°C (Threshold: ${TEMP_THRESHOLD}°C)" + + log_message "ALERT: $message" + + # Send push notification to Uptime Kuma + encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/°/%C2%B0/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g') + curl "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" + + if [ $? -eq 0 ]; then + log_message "Alert sent successfully to Uptime Kuma" + else + log_message "ERROR: Failed to send alert to Uptime Kuma" + fi + } + + # Main monitoring logic + main() { + log_message "Starting CPU temperature check" + + # Get current CPU temperature + current_temp=$(get_cpu_temp) + + if [ -z "$current_temp" ]; then + log_message "ERROR: Could not read CPU temperature" + exit 1 + fi + + log_message "Current CPU temperature: ${current_temp}°C" + + # Check if temperature exceeds threshold + if (( $(echo "$current_temp > $TEMP_THRESHOLD" | bc -l) )); then + log_message "WARNING: CPU temperature ${current_temp}°C exceeds threshold ${TEMP_THRESHOLD}°C" + send_uptime_kuma_alert "$current_temp" + else + log_message "CPU temperature is within normal range" + fi + } + + # Run main function + main + owner: root + group: root + mode: '0755' + + - name: Create systemd service for CPU temperature monitoring + copy: + dest: "/etc/systemd/system/{{ systemd_service_name }}.service" + content: | + [Unit] + Description=Nodito CPU Temperature Monitor + After=network.target + + [Service] + Type=oneshot + ExecStart={{ monitoring_script_path }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create systemd timer for CPU temperature monitoring + copy: + dest: "/etc/systemd/system/{{ systemd_service_name }}.timer" + content: | + [Unit] + Description=Run Nodito CPU Temperature Monitor every {{ temp_check_interval_minutes }} minute(s) + Requires={{ systemd_service_name }}.service + + [Timer] + OnBootSec={{ temp_check_interval_minutes }}min + OnUnitActiveSec={{ temp_check_interval_minutes }}min + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Reload systemd daemon + systemd: + daemon_reload: yes + + - name: Enable and start CPU temperature monitoring timer + systemd: + name: "{{ systemd_service_name }}.timer" + enabled: yes + state: started + + - name: Test CPU temperature monitoring script + command: "{{ monitoring_script_path }}" + register: script_test + changed_when: false + + - name: Verify script execution + assert: + that: + - script_test.rc == 0 + fail_msg: "CPU temperature monitoring script failed to execute properly" + + - name: Check if sensors are available + command: sensors + register: sensors_check + changed_when: false + failed_when: false + + - name: Display sensor information + debug: + msg: "Sensor information: {{ sensors_check.stdout_lines if sensors_check.rc == 0 else 'Sensors not available - using fallback methods' }}" + + - name: Show monitoring configuration + debug: + msg: + - "CPU Temperature Monitoring configured successfully" + - "Temperature threshold: {{ temp_threshold_celsius }}°C" + - "Check interval: {{ temp_check_interval_minutes }} minute(s)" + - "Uptime Kuma URL: {{ nodito_uptime_kuma_cpu_temp_push_url }}" + - "Monitoring script: {{ monitoring_script_path }}" + - "Log file: {{ log_file }}" + - "Service: {{ systemd_service_name }}.service" + - "Timer: {{ systemd_service_name }}.timer" diff --git a/ansible/infra/nodito/nodito_vars.yml b/ansible/infra/nodito/nodito_vars.yml new file mode 100644 index 0000000..c4a8ebd --- /dev/null +++ b/ansible/infra/nodito/nodito_vars.yml @@ -0,0 +1,13 @@ +# Nodito CPU Temperature Monitoring Configuration + +# Temperature Monitoring Configuration +temp_threshold_celsius: 80 +temp_check_interval_minutes: 1 + +# Script Configuration +monitoring_script_dir: /opt/nodito-monitoring +monitoring_script_path: "{{ monitoring_script_dir }}/cpu_temp_monitor.sh" +log_file: "{{ monitoring_script_dir }}/cpu_temp_monitor.log" + +# System Configuration +systemd_service_name: nodito-cpu-temp-monitor