stuff
This commit is contained in:
parent
c8754e1bdc
commit
fbbeb59c0e
28 changed files with 907 additions and 995 deletions
|
|
@ -1,203 +0,0 @@
|
|||
- name: Deploy Nodito CPU Temperature Monitoring
|
||||
hosts: nodito
|
||||
become: yes
|
||||
vars_files:
|
||||
- ../../infra_vars.yml
|
||||
- ./nodito_vars.yml
|
||||
- ./nodito_secrets.yml
|
||||
|
||||
tasks:
|
||||
- name: Validate Uptime Kuma URL is provided
|
||||
assert:
|
||||
that:
|
||||
- nodito_uptime_kuma_cpu_temp_push_url != ""
|
||||
fail_msg: "uptime_kuma_url must be set in nodito_secrets.yml"
|
||||
|
||||
- name: Install required packages for temperature monitoring
|
||||
package:
|
||||
name:
|
||||
- lm-sensors
|
||||
- curl
|
||||
- jq
|
||||
- bc
|
||||
state: present
|
||||
|
||||
- name: Create monitoring script directory
|
||||
file:
|
||||
path: "{{ monitoring_script_dir }}"
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Create CPU temperature monitoring script
|
||||
copy:
|
||||
dest: "{{ monitoring_script_path }}"
|
||||
content: |
|
||||
#!/bin/bash
|
||||
|
||||
# CPU Temperature Monitoring Script for Nodito
|
||||
# Monitors CPU temperature and sends alerts to Uptime Kuma
|
||||
|
||||
LOG_FILE="{{ log_file }}"
|
||||
TEMP_THRESHOLD="{{ temp_threshold_celsius }}"
|
||||
UPTIME_KUMA_URL="{{ nodito_uptime_kuma_cpu_temp_push_url }}"
|
||||
|
||||
# Function to log messages
|
||||
log_message() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# Function to get CPU temperature
|
||||
get_cpu_temp() {
|
||||
# Try different methods to get CPU temperature
|
||||
local temp=""
|
||||
|
||||
# Method 1: sensors command (most common)
|
||||
if command -v sensors >/dev/null 2>&1; then
|
||||
temp=$(sensors 2>/dev/null | grep -E "Core 0|Package id 0|Tdie|Tctl" | head -1 | grep -oE '[0-9]+\.[0-9]+°C' | grep -oE '[0-9]+\.[0-9]+')
|
||||
fi
|
||||
|
||||
# Method 2: thermal zone (fallback)
|
||||
if [ -z "$temp" ] && [ -f /sys/class/thermal/thermal_zone0/temp ]; then
|
||||
temp=$(cat /sys/class/thermal/thermal_zone0/temp)
|
||||
temp=$(echo "scale=1; $temp/1000" | bc -l 2>/dev/null || echo "$temp")
|
||||
fi
|
||||
|
||||
# Method 3: acpi (fallback)
|
||||
if [ -z "$temp" ] && command -v acpi >/dev/null 2>&1; then
|
||||
temp=$(acpi -t 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
fi
|
||||
|
||||
echo "$temp"
|
||||
}
|
||||
|
||||
# Function to send alert to Uptime Kuma
|
||||
send_uptime_kuma_alert() {
|
||||
local temp="$1"
|
||||
local message="CPU Temperature Alert: ${temp}°C (Threshold: ${TEMP_THRESHOLD}°C)"
|
||||
|
||||
log_message "ALERT: $message"
|
||||
|
||||
# Send push notification to Uptime Kuma
|
||||
encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/°/%C2%B0/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g')
|
||||
curl "$UPTIME_KUMA_URL?status=up&msg=$encoded_message"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
log_message "Alert sent successfully to Uptime Kuma"
|
||||
else
|
||||
log_message "ERROR: Failed to send alert to Uptime Kuma"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main monitoring logic
|
||||
main() {
|
||||
log_message "Starting CPU temperature check"
|
||||
|
||||
# Get current CPU temperature
|
||||
current_temp=$(get_cpu_temp)
|
||||
|
||||
if [ -z "$current_temp" ]; then
|
||||
log_message "ERROR: Could not read CPU temperature"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_message "Current CPU temperature: ${current_temp}°C"
|
||||
|
||||
# Check if temperature exceeds threshold
|
||||
if (( $(echo "$current_temp > $TEMP_THRESHOLD" | bc -l) )); then
|
||||
log_message "WARNING: CPU temperature ${current_temp}°C exceeds threshold ${TEMP_THRESHOLD}°C"
|
||||
send_uptime_kuma_alert "$current_temp"
|
||||
else
|
||||
log_message "CPU temperature is within normal range"
|
||||
fi
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Create systemd service for CPU temperature monitoring
|
||||
copy:
|
||||
dest: "/etc/systemd/system/{{ systemd_service_name }}.service"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Nodito CPU Temperature Monitor
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart={{ monitoring_script_path }}
|
||||
User=root
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Create systemd timer for CPU temperature monitoring
|
||||
copy:
|
||||
dest: "/etc/systemd/system/{{ systemd_service_name }}.timer"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Run Nodito CPU Temperature Monitor every {{ temp_check_interval_minutes }} minute(s)
|
||||
Requires={{ systemd_service_name }}.service
|
||||
|
||||
[Timer]
|
||||
OnBootSec={{ temp_check_interval_minutes }}min
|
||||
OnUnitActiveSec={{ temp_check_interval_minutes }}min
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Reload systemd daemon
|
||||
systemd:
|
||||
daemon_reload: yes
|
||||
|
||||
- name: Enable and start CPU temperature monitoring timer
|
||||
systemd:
|
||||
name: "{{ systemd_service_name }}.timer"
|
||||
enabled: yes
|
||||
state: started
|
||||
|
||||
- name: Test CPU temperature monitoring script
|
||||
command: "{{ monitoring_script_path }}"
|
||||
register: script_test
|
||||
changed_when: false
|
||||
|
||||
- name: Verify script execution
|
||||
assert:
|
||||
that:
|
||||
- script_test.rc == 0
|
||||
fail_msg: "CPU temperature monitoring script failed to execute properly"
|
||||
|
||||
- name: Check if sensors are available
|
||||
command: sensors
|
||||
register: sensors_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Display sensor information
|
||||
debug:
|
||||
msg: "Sensor information: {{ sensors_check.stdout_lines if sensors_check.rc == 0 else 'Sensors not available - using fallback methods' }}"
|
||||
|
||||
- name: Show monitoring configuration
|
||||
debug:
|
||||
msg:
|
||||
- "CPU Temperature Monitoring configured successfully"
|
||||
- "Temperature threshold: {{ temp_threshold_celsius }}°C"
|
||||
- "Check interval: {{ temp_check_interval_minutes }} minute(s)"
|
||||
- "Uptime Kuma URL: {{ nodito_uptime_kuma_cpu_temp_push_url }}"
|
||||
- "Monitoring script: {{ monitoring_script_path }}"
|
||||
- "Log file: {{ log_file }}"
|
||||
- "Service: {{ systemd_service_name }}.service"
|
||||
- "Timer: {{ systemd_service_name }}.timer"
|
||||
Loading…
Add table
Add a link
Reference in a new issue