From fbbeb59c0e9f7c48e40948d7c36798e90c245887 Mon Sep 17 00:00:00 2001 From: counterweight Date: Fri, 14 Nov 2025 23:36:00 +0100 Subject: [PATCH] stuff --- 01_infra_setup.md | 71 ++- 02_vps_core_services_setup.md | 37 +- DEPENDENCY_GRAPH.md | 419 ------------------ ansible/infra/410_disk_usage_alerts.yml | 3 +- ansible/infra/420_system_healthcheck.yml | 3 +- ansible/infra/430_cpu_temp_alerts.yml | 316 +++++++++++++ ansible/infra/920_join_headscale_mesh.yml | 4 +- ansible/infra/nodito/40_cpu_temp_alerts.yml | 203 --------- ansible/services/forgejo/forgejo_vars.yml | 4 + .../forgejo/setup_backup_forgejo_to_lapy.yml | 86 ++++ .../headscale/deploy_headscale_playbook.yml | 1 + ansible/services/headscale/headscale_vars.yml | 5 +- .../deploy_ntfy_emergency_app_playbook.yml | 5 + .../ntfy_emergency_app_vars.yml | 3 - ansible/services/ntfy/ntfy_vars.yml | 3 +- .../setup_ntfy_uptime_kuma_notification.yml | 2 +- .../deploy_personal_blog_playbook.yml | 105 ----- .../personal-blog/personal_blog_vars.yml | 6 - ansible/services_config.yml | 8 +- ansible/services_config.yml.example | 8 +- human_script.md | 45 +- scripts/README.md | 140 ------ scripts/setup_layer_6_infra_monitoring.sh | 32 +- scripts/setup_layer_8_secondary_services.sh | 363 +++++++++++++++ tofu/nodito/README.md | 8 + tofu/nodito/main.tf | 10 + tofu/nodito/terraform.tfvars.example | 7 + tofu/nodito/variables.tf | 5 + 28 files changed, 907 insertions(+), 995 deletions(-) delete mode 100644 DEPENDENCY_GRAPH.md create mode 100644 ansible/infra/430_cpu_temp_alerts.yml delete mode 100644 ansible/infra/nodito/40_cpu_temp_alerts.yml create mode 100644 ansible/services/forgejo/setup_backup_forgejo_to_lapy.yml delete mode 100644 ansible/services/personal-blog/deploy_personal_blog_playbook.yml delete mode 100644 ansible/services/personal-blog/personal_blog_vars.yml delete mode 100644 scripts/README.md create mode 100755 scripts/setup_layer_8_secondary_services.sh diff --git a/01_infra_setup.md b/01_infra_setup.md index 52bb3f9..b5a3630 100644 --- a/01_infra_setup.md +++ b/01_infra_setup.md @@ -89,20 +89,19 @@ Note that, by applying these playbooks, both the root user and the `counterweigh * Verify the changes are working correctly * After running this playbook, clear your browser cache or perform a hard reload (Ctrl+Shift+R) before using the Proxmox VE Web UI to avoid UI display issues. -### Deploy CPU Temperature Monitoring +### Deploy Infra Monitoring (Disk, Health, CPU Temp) -* The nodito server can be configured with CPU temperature monitoring that sends alerts to Uptime Kuma when temperatures exceed a threshold. -* Before running the CPU temperature monitoring playbook, you need to create a secrets file with your Uptime Kuma push URL: - * Create `ansible/infra/nodito/nodito_secrets.yml` with: - ```yaml - uptime_kuma_url: "https://your-uptime-kuma.com/api/push/your-push-key" - ``` -* Run the CPU temperature monitoring setup with: `ansible-playbook -i inventory.ini infra/nodito/40_cpu_temp_alerts.yml` -* This will: - * Install required packages (lm-sensors, curl, jq, bc) - * Create a monitoring script that checks CPU temperature every minute - * Set up a systemd service and timer for automated monitoring - * Send alerts to Uptime Kuma when temperature exceeds the threshold (default: 80°C) +* Nodito can run the same monitoring stack used elsewhere: disk usage, heartbeat healthcheck, and CPU temperature alerts feeding Uptime Kuma. +* Playbooks to run (in any order): + * `ansible-playbook -i inventory.ini infra/410_disk_usage_alerts.yml` + * `ansible-playbook -i inventory.ini infra/420_system_healthcheck.yml` + * `ansible-playbook -i inventory.ini infra/430_cpu_temp_alerts.yml` +* Each playbook automatically: + * Creates/updates the corresponding monitor in Uptime Kuma (including ntfy notification wiring) + * Installs any required packages (curl, lm-sensors, jq, bc, etc.) + * Creates the monitoring script(s) and log files + * Sets up systemd services and timers for automated runs + * Sends alerts to Uptime Kuma when thresholds are exceeded or heartbeats stop ### Setup ZFS Storage Pool @@ -131,6 +130,26 @@ Note that, by applying these playbooks, both the root user and the `counterweigh * Enable ZFS services for automatic pool import on boot * **Warning**: This will destroy all data on the specified disks. Make sure you're using the correct disk IDs and that the disks don't contain important data. +### Build Debian Cloud Template for Proxmox + +* After storage is ready, create a reusable Debian cloud template so future Proxmox VMs can be cloned in seconds. +* Run: `ansible-playbook -i inventory.ini infra/nodito/33_proxmox_debian_cloud_template.yml` +* This playbook: + * Downloads the latest Debian generic cloud qcow2 image (override via `debian_cloud_image_url`/`debian_cloud_image_filename`) + * Imports it into your Proxmox storage (defaults to the configured ZFS pool) and builds VMID `9001` as a template + * Injects your SSH keys, enables qemu-guest-agent, configures DHCP networking, and sizes the disk (default 10 GB) + * Drops a cloud-init snippet so clones automatically install qemu-guest-agent and can run upgrades on first boot +* Once it finishes, provision new machines with `qm clone 9001 --name ` plus your usual cloud-init overrides. + +### Provision VMs with OpenTofu + +* Prefer a declarative workflow? The `tofu/nodito` project clones VM definitions from the template automatically. +* Quick start (see `tofu/nodito/README.md` for full details): + 1. Install OpenTofu, copy `terraform.tfvars.example` to `terraform.tfvars`, and fill in the Proxmox API URL/token plus your SSH public key. + 2. Define VMs in the `vms` map (name, cores, memory, disk size, `ipconfig0`, optional `vlan_tag`). Disks default to the `proxmox-tank-1` ZFS pool. + 3. Run `tofu init`, `tofu plan -var-file=terraform.tfvars`, and `tofu apply -var-file=terraform.tfvars`. +* Each VM is cloned from the `debian-13-cloud-init` template (VMID 9001), attaches to `vmbr0`, and boots with qemu-guest-agent + your keys injected via cloud-init. Updates to the tfvars map let you grow/shrink the fleet with a single `tofu apply`. + ## General prep for all machines ### Set up Infrastructure Secrets @@ -146,32 +165,6 @@ Note that, by applying these playbooks, both the root user and the `counterweigh ``` * **Important**: Never commit this file to version control (it's in `.gitignore`) -### Deploy Disk Usage Monitoring - -* Any machine can be configured with disk usage monitoring that sends alerts to Uptime Kuma when disk usage exceeds a threshold. -* This playbook automatically creates an Uptime Kuma push monitor for each host (idempotent - won't create duplicates). -* Prerequisites: - * Install the Uptime Kuma Ansible collection: `ansible-galaxy collection install -r ansible/requirements.yml` - * Install Python dependencies: `pip install -r requirements.txt` (includes uptime-kuma-api) - * Set up `ansible/infra_secrets.yml` with your Uptime Kuma API token (see above) - * Uptime Kuma must be deployed (the playbook automatically uses the URL from `uptime_kuma_vars.yml`) -* Run the disk monitoring setup with: - ```bash - ansible-playbook -i inventory.ini infra/410_disk_usage_alerts.yml - ``` -* This will: - * Create an Uptime Kuma monitor group per host named "{hostname} - infra" (idempotent) - * Create a push monitor in Uptime Kuma with "upside down" mode (no news is good news) - * Assign the monitor to the host's group for better organization - * Install required packages (curl, bc) - * Create a monitoring script that checks disk usage at configured intervals (default: 15 minutes) - * Set up a systemd service and timer for automated monitoring - * Send alerts to Uptime Kuma only when usage exceeds threshold (default: 80%) -* Optional configuration: - * Change threshold: `-e "disk_usage_threshold_percent=85"` - * Change check interval: `-e "disk_check_interval_minutes=10"` - * Monitor different mount point: `-e "monitored_mount_point=/home"` - ## GPG Keys Some of the backups are stored encrypted for security. To allow this, fill in the gpg variables listed in `example.inventory.ini` under the `lapy` block. diff --git a/02_vps_core_services_setup.md b/02_vps_core_services_setup.md index 1c4b708..7ba7337 100644 --- a/02_vps_core_services_setup.md +++ b/02_vps_core_services_setup.md @@ -181,49 +181,16 @@ ntfy-emergency-app is a simple web application that allows trusted people to sen ### Deploy -* Decide what subdomain you want to serve the emergency app on and add it to `services/ntfy-emergency-app/ntfy_emergency_app_vars.yml` on the `ntfy_emergency_app_subdomain`. +* Decide what subdomain you want to serve the emergency app on and update `ansible/services_config.yml` under `ntfy_emergency_app`. * Note that you will have to add a DNS entry to point to the VPS public IP. * Configure the ntfy settings in `ntfy_emergency_app_vars.yml`: * `ntfy_emergency_app_topic`: The ntfy topic to send messages to (default: "emergency") - * `ntfy_emergency_app_ntfy_url`: Your ntfy server URL (default: "https://ntfy.sh") - * `ntfy_emergency_app_ntfy_user`: Username for ntfy authentication (optional) - * `ntfy_emergency_app_ntfy_password`: Password for ntfy authentication (optional) * `ntfy_emergency_app_ui_message`: Custom message displayed in the web interface +* Ensure `infra_secrets.yml` contains `ntfy_username` and `ntfy_password` with the credentials the app should use. * Make sure docker is available on the host. * Run the deployment playbook: `ansible-playbook -i inventory.ini services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml`. -## Personal Blog - -Personal blog is a static website served directly by Caddy. - -### Deploy - -* Decide what subdomain you want to serve the blog on and add it to `services/personal-blog/personal_blog_vars.yml` on the `personal_blog_subdomain`. - * Note that you will have to add a DNS entry to point to the VPS public IP. -* Configure the git repository settings in `personal_blog_vars.yml`: - * `personal_blog_git_repo`: The HTTPS URL to your git repository (default: "https://forgejo.contrapeso.xyz/counterweight/pablohere.git") - * `personal_blog_source_folder`: The folder within the repo containing static files (default: "public") -* Set up a Forgejo deploy token: - * Go to your repository → Settings → Deploy Tokens - * Create a new token with "Read" permissions - * Copy the token (you won't see it again) -* Export the token as an environment variable: `export PERSONAL_BLOG_DEPLOY_TOKEN=your_token_here` -* Run the deployment playbook: `ansible-playbook -i inventory.ini services/personal-blog/deploy_personal_blog_playbook.yml`. - -### Configure - -* The blog will be automatically updated every hour via a cron job that pulls the latest changes from the git repository. -* Static files are served directly by Caddy from the configured webroot directory. -* No additional configuration is needed - the site will be available at your configured domain. - -### Updating content - -* Simply push changes to the `master` branch of your git repository. -* The cron job will automatically pull and deploy updates within an hour. -* For immediate updates, you can manually run: `/usr/local/bin/update-personal-blog.sh` on the server. - - ## Headscale Headscale is a self-hosted Tailscale control server that allows you to create your own Tailscale network. diff --git a/DEPENDENCY_GRAPH.md b/DEPENDENCY_GRAPH.md deleted file mode 100644 index 1ad628b..0000000 --- a/DEPENDENCY_GRAPH.md +++ /dev/null @@ -1,419 +0,0 @@ -# Infrastructure Dependency Graph - -This document maps out the dependencies between all infrastructure components and services, providing a clear order for building out the personal infrastructure. - -## Infrastructure Overview - -### Machines (Hosts) -- **lapy**: Laptop (Ansible control node) -- **vipy**: Main VPS (207.154.226.192) - hosts most services -- **watchtower**: Monitoring VPS (206.189.63.167) - hosts Uptime Kuma and ntfy -- **spacey**: Headscale VPS (165.232.73.4) - hosts Headscale coordination server -- **nodito**: Proxmox server (192.168.1.139) - home infrastructure -- **memos-box**: Separate box for memos (192.168.1.149) - ---- - -## Dependency Layers - -### Layer 0: Prerequisites (No Dependencies) -These must exist before anything else can be deployed. - -#### On lapy (Laptop - Ansible Control Node) -- Python venv with Ansible -- SSH keys configured -- Domain name configured (`root_domain` in `infra_vars.yml`) - -**Commands:** -```bash -python3 -m venv venv -source venv/bin/activate -pip install -r requirements.txt -ansible-galaxy collection install -r ansible/requirements.yml -``` - ---- - -### Layer 1: Basic Machine Setup (Depends on: Layer 0) -Initial machine provisioning and security hardening. - -#### All VPSs (vipy, watchtower, spacey) -**Playbooks (in order):** -1. `infra/01_user_and_access_setup_playbook.yml` - Create user, setup SSH -2. `infra/02_firewall_and_fail2ban_playbook.yml` - Firewall, fail2ban, auditd - -**Dependencies:** -- SSH access with root user -- SSH key pair - -#### Nodito (Proxmox Server) -**Playbooks (in order):** -1. `infra/nodito/30_proxmox_bootstrap_playbook.yml` - SSH keys, user creation, security -2. `infra/nodito/31_proxmox_community_repos_playbook.yml` - Switch to community repos -3. `infra/nodito/32_zfs_pool_setup_playbook.yml` - ZFS storage pool (optional) -4. `infra/nodito/33_proxmox_debian_cloud_template.yml` - Cloud template (optional) - -**Dependencies:** -- Root password access initially -- Disk IDs identified for ZFS (if using ZFS) - -#### Memos-box -**Playbooks:** -1. `infra/01_user_and_access_setup_playbook.yml` -2. `infra/02_firewall_and_fail2ban_playbook.yml` - ---- - -### Layer 2: General Infrastructure Tools (Depends on: Layer 1) -Common utilities needed across multiple services. - -#### On All Machines (as needed per service requirements) -**Playbooks:** -- `infra/900_install_rsync.yml` - For backup operations -- `infra/910_docker_playbook.yml` - For Docker-based services -- `infra/920_join_headscale_mesh.yml` - Join machines to VPN mesh (requires Layer 5 - Headscale) - -**Dependencies:** -- Layer 1 complete (user and firewall setup) - -**Notes:** -- rsync needed on: vipy, watchtower, lapy (for backups) -- docker needed on: vipy, watchtower (for containerized services) - ---- - -### Layer 3: Reverse Proxy (Depends on: Layer 2) -Caddy provides HTTPS termination and reverse proxying for all web services. - -#### On vipy, watchtower, spacey -**Playbook:** -- `services/caddy_playbook.yml` - -**Dependencies:** -- Layer 1 complete (firewall configured to allow ports 80/443) -- No other services required - -**Critical Note:** -- Caddy is deployed to vipy, watchtower, and spacey -- Each service deployed configures its own Caddy reverse proxy automatically -- All subsequent web services depend on Caddy being installed first - ---- - -### Layer 4: Core Monitoring & Notifications (Depends on: Layer 3) -These services provide monitoring and alerting for all other infrastructure. - -#### 4A: ntfy (Notification Service) -**Host:** watchtower -**Playbook:** `services/ntfy/deploy_ntfy_playbook.yml` - -**Dependencies:** -- Caddy on watchtower (Layer 3) -- DNS record for ntfy subdomain -- NTFY_USER and NTFY_PASSWORD environment variables - -**Used By:** -- Uptime Kuma (for notifications) -- ntfy-emergency-app -- Any service needing push notifications - -#### 4B: Uptime Kuma (Monitoring Platform) -**Host:** watchtower -**Playbook:** `services/uptime_kuma/deploy_uptime_kuma_playbook.yml` - -**Dependencies:** -- Caddy on watchtower (Layer 3) -- Docker on watchtower (Layer 2) -- DNS record for uptime kuma subdomain - -**Used By:** -- All infrastructure monitoring (disk alerts, healthchecks, CPU temp) -- Service availability monitoring - -**Backup:** `services/uptime_kuma/setup_backup_uptime_kuma_to_lapy.yml` -- Requires rsync on watchtower and lapy - ---- - -### Layer 5: VPN Infrastructure (Depends on: Layer 3) -Headscale provides secure mesh networking between all machines. - -#### Headscale (VPN Coordination Server) -**Host:** spacey -**Playbook:** `services/headscale/deploy_headscale_playbook.yml` - -**Dependencies:** -- Caddy on spacey (Layer 3) -- DNS record for headscale subdomain - -**Enables:** -- Secure communication between all machines -- Magic DNS for hostname resolution -- Join machines using: `infra/920_join_headscale_mesh.yml` - -**Backup:** `services/headscale/setup_backup_headscale_to_lapy.yml` -- Requires rsync on spacey and lapy - ---- - -### Layer 6: Infrastructure Monitoring (Depends on: Layer 4) -Automated monitoring scripts that report to Uptime Kuma. - -#### On All Machines -**Playbooks:** -- `infra/410_disk_usage_alerts.yml` - Disk usage monitoring -- `infra/420_system_healthcheck.yml` - System health pings - -**Dependencies:** -- Uptime Kuma deployed (Layer 4B) -- `infra_secrets.yml` with Uptime Kuma credentials -- Python uptime-kuma-api installed on lapy - -#### On Nodito Only -**Playbook:** -- `infra/nodito/40_cpu_temp_alerts.yml` - CPU temperature monitoring - -**Dependencies:** -- Uptime Kuma deployed (Layer 4B) -- `nodito_secrets.yml` with Uptime Kuma push URL - ---- - -### Layer 7: Core Services (Depends on: Layers 3-4) -Essential services for personal infrastructure. - -#### 7A: Vaultwarden (Password Manager) -**Host:** vipy -**Playbook:** `services/vaultwarden/deploy_vaultwarden_playbook.yml` - -**Dependencies:** -- Caddy on vipy (Layer 3) -- Docker on vipy (Layer 2) -- Fail2ban on vipy (Layer 1) -- DNS record for vaultwarden subdomain - -**Post-Deploy:** -- Create first user account -- Run `services/vaultwarden/disable_vaultwarden_sign_ups_playbook.yml` to disable registrations - -**Backup:** `services/vaultwarden/setup_backup_vaultwarden_to_lapy.yml` -- Requires rsync on vipy and lapy - -#### 7B: Forgejo (Git Server) -**Host:** vipy -**Playbook:** `services/forgejo/deploy_forgejo_playbook.yml` - -**Dependencies:** -- Caddy on vipy (Layer 3) -- DNS record for forgejo subdomain - -**Used By:** -- Personal blog (Layer 8) -- Any service pulling from git repos - -#### 7C: LNBits (Lightning Wallet) -**Host:** vipy -**Playbook:** `services/lnbits/deploy_lnbits_playbook.yml` - -**Dependencies:** -- Caddy on vipy (Layer 3) -- DNS record for lnbits subdomain -- Python 3.12 via pyenv -- Poetry for dependency management - -**Backup:** `services/lnbits/setup_backup_lnbits_to_lapy.yml` -- Requires rsync on vipy and lapy -- Backups are GPG encrypted (requires GPG keys configured) - ---- - -### Layer 8: Secondary Services (Depends on: Layer 7) -Services that depend on core services being available. - -#### 8A: Personal Blog (Static Site) -**Host:** vipy -**Playbook:** `services/personal-blog/deploy_personal_blog_playbook.yml` - -**Dependencies:** -- Caddy on vipy (Layer 3) -- Forgejo on vipy (Layer 7B) - blog content hosted in Forgejo repo -- rsync on vipy (Layer 2) -- DNS record for blog subdomain -- PERSONAL_BLOG_DEPLOY_TOKEN environment variable (Forgejo deploy token) - -**Notes:** -- Auto-updates hourly via cron from Forgejo repo -- Serves static files directly through Caddy - -#### 8B: ntfy-emergency-app -**Host:** vipy -**Playbook:** `services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml` - -**Dependencies:** -- Caddy on vipy (Layer 3) -- Docker on vipy (Layer 2) -- ntfy on watchtower (Layer 4A) -- DNS record for emergency app subdomain - -**Notes:** -- Configured with ntfy server URL and credentials -- Sends emergency notifications to ntfy topics - -#### 8C: Memos (Note-taking) -**Host:** memos-box -**Playbook:** `services/memos/deploy_memos_playbook.yml` - -**Dependencies:** -- Caddy on memos-box (Layer 3) -- DNS record for memos subdomain - ---- - -## Deployment Order Summary - -### Phase 1: Foundation -1. Setup lapy as Ansible control node -2. Configure domain and DNS -3. Deploy Layer 1 on all machines (users, firewall) -4. Deploy Layer 2 tools (rsync, docker as needed) - -### Phase 2: Web Infrastructure -5. Deploy Caddy (Layer 3) on vipy, watchtower, spacey - -### Phase 3: Monitoring Foundation -6. Deploy ntfy on watchtower (Layer 4A) -7. Deploy Uptime Kuma on watchtower (Layer 4B) -8. Configure Uptime Kuma with ntfy notifications - -### Phase 4: Mesh Network (Optional but Recommended) -9. Deploy Headscale on spacey (Layer 5) -10. Join machines to mesh using 920 playbook - -### Phase 5: Infrastructure Monitoring -11. Deploy disk usage alerts on all machines (Layer 6) -12. Deploy system healthcheck on all machines (Layer 6) -13. Deploy CPU temp alerts on nodito (Layer 6) - -### Phase 6: Core Services -14. Deploy Vaultwarden on vipy (Layer 7A) -15. Deploy Forgejo on vipy (Layer 7B) -16. Deploy LNBits on vipy (Layer 7C) - -### Phase 7: Secondary Services -17. Deploy Personal Blog on vipy (Layer 8A) -18. Deploy ntfy-emergency-app on vipy (Layer 8B) -19. Deploy Memos on memos-box (Layer 8C) - -### Phase 8: Backups -20. Configure all backup playbooks (to lapy) - ---- - -## Critical Dependencies Map - -``` -Legend: → (depends on) - -MONITORING CHAIN: - ntfy (Layer 4A) → Caddy (Layer 3) - Uptime Kuma (Layer 4B) → Caddy (Layer 3) + Docker (Layer 2) + ntfy (Layer 4A) - Disk Alerts (Layer 6) → Uptime Kuma (Layer 4B) - System Healthcheck (Layer 6) → Uptime Kuma (Layer 4B) - CPU Temp Alerts (Layer 6) → Uptime Kuma (Layer 4B) - -WEB SERVICES CHAIN: - Caddy (Layer 3) → Firewall configured (Layer 1) - Vaultwarden (Layer 7A) → Caddy (Layer 3) + Docker (Layer 2) - Forgejo (Layer 7B) → Caddy (Layer 3) - LNBits (Layer 7C) → Caddy (Layer 3) - Personal Blog (Layer 8A) → Caddy (Layer 3) + Forgejo (Layer 7B) - ntfy-emergency-app (Layer 8B) → Caddy (Layer 3) + Docker (Layer 2) + ntfy (Layer 4A) - Memos (Layer 8C) → Caddy (Layer 3) - -VPN CHAIN: - Headscale (Layer 5) → Caddy (Layer 3) - All machines can join mesh → Headscale (Layer 5) - -BACKUP CHAIN: - All backups → rsync (Layer 2) on source + lapy - LNBits backups → GPG keys configured on lapy -``` - ---- - -## Host-Service Matrix - -| Service | vipy | watchtower | spacey | nodito | memos-box | -|---------|------|------------|--------|--------|-----------| -| Caddy | ✓ | ✓ | ✓ | - | ✓ | -| Docker | ✓ | ✓ | - | - | - | -| Uptime Kuma | - | ✓ | - | - | - | -| ntfy | - | ✓ | - | - | - | -| Headscale | - | - | ✓ | - | - | -| Vaultwarden | ✓ | - | - | - | - | -| Forgejo | ✓ | - | - | - | - | -| LNBits | ✓ | - | - | - | - | -| Personal Blog | ✓ | - | - | - | - | -| ntfy-emergency-app | ✓ | - | - | - | - | -| Memos | - | - | - | - | ✓ | -| Disk Alerts | ✓ | ✓ | ✓ | ✓ | ✓ | -| System Healthcheck | ✓ | ✓ | ✓ | ✓ | ✓ | -| CPU Temp Alerts | - | - | - | ✓ | - | - ---- - -## Pre-Deployment Checklist - -### Before Starting -- [ ] SSH keys generated and added to VPS providers -- [ ] Domain name acquired and accessible -- [ ] Python venv created on lapy with Ansible installed -- [ ] `inventory.ini` created and populated with all host IPs -- [ ] `infra_vars.yml` configured with root domain -- [ ] All VPSs accessible via SSH as root initially - -### DNS Records to Configure -Create A records pointing to appropriate IPs: -- Uptime Kuma subdomain → watchtower IP -- ntfy subdomain → watchtower IP -- Headscale subdomain → spacey IP -- Vaultwarden subdomain → vipy IP -- Forgejo subdomain → vipy IP -- LNBits subdomain → vipy IP -- Personal Blog subdomain → vipy IP -- ntfy-emergency-app subdomain → vipy IP -- Memos subdomain → memos-box IP - -### Secrets to Configure -- [ ] `infra_secrets.yml` created with Uptime Kuma credentials -- [ ] `nodito_secrets.yml` created with Uptime Kuma push URL -- [ ] NTFY_USER and NTFY_PASSWORD environment variables for ntfy deployment -- [ ] PERSONAL_BLOG_DEPLOY_TOKEN environment variable (from Forgejo) -- [ ] GPG keys configured on lapy (for encrypted backups) - ---- - -## Notes - -### Why This Order Matters - -1. **Caddy First**: All web services need reverse proxy, so Caddy must be deployed before any service that requires HTTPS access. - -2. **Monitoring Early**: Deploying ntfy and Uptime Kuma early means all subsequent services can be monitored from the start. Infrastructure alerts can catch issues immediately. - -3. **Forgejo Before Blog**: The personal blog pulls content from Forgejo, so the git server must exist first. - -4. **Headscale Separation**: Headscale runs on its own VPS (spacey) because vipy needs to be part of the mesh network and can't run the coordination server itself. - -5. **Backup Setup Last**: Backups should be configured after services are stable and have initial data to backup. - -### Machine Isolation Strategy - -- **watchtower**: Runs monitoring services (Uptime Kuma, ntfy) separately so they don't fail when vipy fails -- **spacey**: Runs Headscale coordination server isolated from the mesh clients -- **vipy**: Main services server - most applications run here -- **nodito**: Local Proxmox server for home infrastructure -- **memos-box**: Separate dedicated server for memos service - -This isolation ensures monitoring remains functional even when primary services are down. - diff --git a/ansible/infra/410_disk_usage_alerts.yml b/ansible/infra/410_disk_usage_alerts.yml index 21d74a2..de02f53 100644 --- a/ansible/infra/410_disk_usage_alerts.yml +++ b/ansible/infra/410_disk_usage_alerts.yml @@ -5,8 +5,6 @@ - ../infra_vars.yml - ../services_config.yml - ../infra_secrets.yml - - ../services/uptime_kuma/uptime_kuma_vars.yml - - ../services/ntfy/ntfy_vars.yml vars: disk_usage_threshold_percent: 80 @@ -18,6 +16,7 @@ systemd_service_name: disk-usage-monitor # Uptime Kuma configuration (auto-configured from services_config.yml and infra_secrets.yml) uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" tasks: - name: Validate Uptime Kuma configuration diff --git a/ansible/infra/420_system_healthcheck.yml b/ansible/infra/420_system_healthcheck.yml index 22f399c..fa507bd 100644 --- a/ansible/infra/420_system_healthcheck.yml +++ b/ansible/infra/420_system_healthcheck.yml @@ -5,8 +5,6 @@ - ../infra_vars.yml - ../services_config.yml - ../infra_secrets.yml - - ../services/uptime_kuma/uptime_kuma_vars.yml - - ../services/ntfy/ntfy_vars.yml vars: healthcheck_interval_seconds: 60 # Send healthcheck every 60 seconds (1 minute) @@ -18,6 +16,7 @@ systemd_service_name: system-healthcheck # Uptime Kuma configuration (auto-configured from services_config.yml and infra_secrets.yml) uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" tasks: - name: Validate Uptime Kuma configuration diff --git a/ansible/infra/430_cpu_temp_alerts.yml b/ansible/infra/430_cpu_temp_alerts.yml new file mode 100644 index 0000000..d3c00be --- /dev/null +++ b/ansible/infra/430_cpu_temp_alerts.yml @@ -0,0 +1,316 @@ +- name: Deploy CPU Temperature Monitoring + hosts: nodito + become: yes + vars_files: + - ../infra_vars.yml + - ../services_config.yml + - ../infra_secrets.yml + + vars: + temp_threshold_celsius: 80 + temp_check_interval_minutes: 1 + monitoring_script_dir: /opt/nodito-monitoring + monitoring_script_path: "{{ monitoring_script_dir }}/cpu_temp_monitor.sh" + log_file: "{{ monitoring_script_dir }}/cpu_temp_monitor.log" + systemd_service_name: nodito-cpu-temp-monitor + uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" + + tasks: + - name: Validate Uptime Kuma configuration + assert: + that: + - uptime_kuma_api_url is defined + - uptime_kuma_api_url != "" + - uptime_kuma_username is defined + - uptime_kuma_username != "" + - uptime_kuma_password is defined + - uptime_kuma_password != "" + fail_msg: "uptime_kuma_api_url, uptime_kuma_username and uptime_kuma_password must be set" + + - name: Get hostname for monitor identification + command: hostname + register: host_name + changed_when: false + + - name: Set monitor name and group based on hostname + set_fact: + monitor_name: "cpu-temp-{{ host_name.stdout }}" + monitor_friendly_name: "CPU Temperature: {{ host_name.stdout }}" + uptime_kuma_monitor_group: "{{ host_name.stdout }} - infra" + + - name: Create Uptime Kuma CPU temperature monitor setup script + copy: + dest: /tmp/setup_uptime_kuma_cpu_temp_monitor.py + content: | + #!/usr/bin/env python3 + import sys + import json + from uptime_kuma_api import UptimeKumaApi + + def main(): + api_url = sys.argv[1] + username = sys.argv[2] + password = sys.argv[3] + group_name = sys.argv[4] + monitor_name = sys.argv[5] + monitor_description = sys.argv[6] + interval = int(sys.argv[7]) + ntfy_topic = sys.argv[8] if len(sys.argv) > 8 else "alerts" + + api = UptimeKumaApi(api_url, timeout=60, wait_events=2.0) + api.login(username, password) + + monitors = api.get_monitors() + notifications = api.get_notifications() + + ntfy_notification = next((n for n in notifications if n.get('name') == f'ntfy ({ntfy_topic})'), None) + notification_id_list = {} + if ntfy_notification: + notification_id_list[ntfy_notification['id']] = True + + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + if not group: + api.add_monitor(type='group', name=group_name) + monitors = api.get_monitors() + group = next((m for m in monitors if m.get('name') == group_name and m.get('type') == 'group'), None) + + existing_monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + monitor_data = { + 'type': 'push', + 'name': monitor_name, + 'parent': group['id'], + 'interval': interval, + 'upsideDown': True, + 'description': monitor_description, + 'notificationIDList': notification_id_list + } + + if existing_monitor: + api.edit_monitor(existing_monitor['id'], **monitor_data) + else: + api.add_monitor(**monitor_data) + + monitors = api.get_monitors() + monitor = next((m for m in monitors if m.get('name') == monitor_name), None) + + result = { + 'monitor_id': monitor['id'], + 'push_token': monitor['pushToken'], + 'group_name': group_name, + 'group_id': group['id'], + 'monitor_name': monitor_name + } + print(json.dumps(result)) + + api.disconnect() + + if __name__ == '__main__': + main() + mode: '0755' + delegate_to: localhost + become: no + + - name: Run Uptime Kuma monitor setup script + command: > + {{ ansible_playbook_python }} + /tmp/setup_uptime_kuma_cpu_temp_monitor.py + "{{ uptime_kuma_api_url }}" + "{{ uptime_kuma_username }}" + "{{ uptime_kuma_password }}" + "{{ uptime_kuma_monitor_group }}" + "{{ monitor_name }}" + "{{ monitor_friendly_name }} - Alerts when temperature exceeds {{ temp_threshold_celsius }}°C" + "{{ (temp_check_interval_minutes * 60) + 60 }}" + "{{ ntfy_topic }}" + register: monitor_setup_result + delegate_to: localhost + become: no + changed_when: false + + - name: Parse monitor setup result + set_fact: + monitor_info_parsed: "{{ monitor_setup_result.stdout | from_json }}" + + - name: Set push URL and monitor ID as facts + set_fact: + uptime_kuma_cpu_temp_push_url: "{{ uptime_kuma_api_url }}/api/push/{{ monitor_info_parsed.push_token }}" + uptime_kuma_monitor_id: "{{ monitor_info_parsed.monitor_id }}" + + - name: Install required packages for temperature monitoring + package: + name: + - lm-sensors + - curl + - jq + - bc + state: present + + - name: Create monitoring script directory + file: + path: "{{ monitoring_script_dir }}" + state: directory + owner: root + group: root + mode: '0755' + + - name: Create CPU temperature monitoring script + copy: + dest: "{{ monitoring_script_path }}" + content: | + #!/bin/bash + + # CPU Temperature Monitoring Script + # Monitors CPU temperature and sends alerts to Uptime Kuma + + LOG_FILE="{{ log_file }}" + TEMP_THRESHOLD="{{ temp_threshold_celsius }}" + UPTIME_KUMA_URL="{{ uptime_kuma_cpu_temp_push_url }}" + + log_message() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + get_cpu_temp() { + local temp="" + + if command -v sensors >/dev/null 2>&1; then + temp=$(sensors 2>/dev/null | grep -E "Core 0|Package id 0|Tdie|Tctl" | head -1 | grep -oE '[0-9]+\.[0-9]+°C' | grep -oE '[0-9]+\.[0-9]+') + fi + + if [ -z "$temp" ] && [ -f /sys/class/thermal/thermal_zone0/temp ]; then + temp=$(cat /sys/class/thermal/thermal_zone0/temp) + temp=$(echo "scale=1; $temp/1000" | bc -l 2>/dev/null || echo "$temp") + fi + + if [ -z "$temp" ] && command -v acpi >/dev/null 2>&1; then + temp=$(acpi -t 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + fi + + echo "$temp" + } + + send_uptime_kuma_alert() { + local temp="$1" + local message="CPU Temperature Alert: ${temp}°C (Threshold: ${TEMP_THRESHOLD}°C)" + + log_message "ALERT: $message" + + encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/°/%C2%B0/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g') + response=$(curl -s -w "\n%{http_code}" "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" 2>&1) + http_code=$(echo "$response" | tail -n1) + + if [ "$http_code" = "200" ] || [ "$http_code" = "201" ]; then + log_message "Alert sent successfully to Uptime Kuma (HTTP $http_code)" + else + log_message "ERROR: Failed to send alert to Uptime Kuma (HTTP $http_code)" + fi + } + + main() { + log_message "Starting CPU temperature check" + + current_temp=$(get_cpu_temp) + + if [ -z "$current_temp" ]; then + log_message "ERROR: Could not read CPU temperature" + exit 1 + fi + + log_message "Current CPU temperature: ${current_temp}°C" + + if (( $(echo "$current_temp > $TEMP_THRESHOLD" | bc -l) )); then + log_message "WARNING: CPU temperature ${current_temp}°C exceeds threshold ${TEMP_THRESHOLD}°C" + send_uptime_kuma_alert "$current_temp" + else + log_message "CPU temperature is within normal range" + fi + } + + main + owner: root + group: root + mode: '0755' + + - name: Create systemd service for CPU temperature monitoring + copy: + dest: "/etc/systemd/system/{{ systemd_service_name }}.service" + content: | + [Unit] + Description=CPU Temperature Monitor + After=network.target + + [Service] + Type=oneshot + ExecStart={{ monitoring_script_path }} + User=root + StandardOutput=journal + StandardError=journal + + [Install] + WantedBy=multi-user.target + owner: root + group: root + mode: '0644' + + - name: Create systemd timer for CPU temperature monitoring + copy: + dest: "/etc/systemd/system/{{ systemd_service_name }}.timer" + content: | + [Unit] + Description=Run CPU Temperature Monitor every {{ temp_check_interval_minutes }} minute(s) + Requires={{ systemd_service_name }}.service + + [Timer] + OnBootSec={{ temp_check_interval_minutes }}min + OnUnitActiveSec={{ temp_check_interval_minutes }}min + Persistent=true + + [Install] + WantedBy=timers.target + owner: root + group: root + mode: '0644' + + - name: Reload systemd daemon + systemd: + daemon_reload: yes + + - name: Enable and start CPU temperature monitoring timer + systemd: + name: "{{ systemd_service_name }}.timer" + enabled: yes + state: started + + - name: Test CPU temperature monitoring script + command: "{{ monitoring_script_path }}" + register: script_test + changed_when: false + + - name: Verify script execution + assert: + that: + - script_test.rc == 0 + fail_msg: "CPU temperature monitoring script failed to execute properly" + + - name: Display monitoring configuration + debug: + msg: + - "CPU Temperature Monitoring configured successfully" + - "Temperature threshold: {{ temp_threshold_celsius }}°C" + - "Check interval: {{ temp_check_interval_minutes }} minute(s)" + - "Monitor Name: {{ monitor_friendly_name }}" + - "Monitor Group: {{ uptime_kuma_monitor_group }}" + - "Uptime Kuma Push URL: {{ uptime_kuma_cpu_temp_push_url }}" + - "Monitoring script: {{ monitoring_script_path }}" + - "Systemd Service: {{ systemd_service_name }}.service" + - "Systemd Timer: {{ systemd_service_name }}.timer" + + - name: Clean up temporary Uptime Kuma setup script + file: + path: /tmp/setup_uptime_kuma_cpu_temp_monitor.py + state: absent + delegate_to: localhost + become: no + diff --git a/ansible/infra/920_join_headscale_mesh.yml b/ansible/infra/920_join_headscale_mesh.yml index 3611121..10675ae 100644 --- a/ansible/infra/920_join_headscale_mesh.yml +++ b/ansible/infra/920_join_headscale_mesh.yml @@ -3,9 +3,11 @@ become: yes vars_files: - ../infra_vars.yml - - ../services/headscale/headscale_vars.yml + - ../services_config.yml vars: + headscale_subdomain: "{{ subdomains.headscale }}" headscale_domain: "https://{{ headscale_subdomain }}.{{ root_domain }}" + headscale_namespace: "{{ service_settings.headscale.namespace }}" tasks: - name: Set headscale host diff --git a/ansible/infra/nodito/40_cpu_temp_alerts.yml b/ansible/infra/nodito/40_cpu_temp_alerts.yml deleted file mode 100644 index bbcde23..0000000 --- a/ansible/infra/nodito/40_cpu_temp_alerts.yml +++ /dev/null @@ -1,203 +0,0 @@ -- name: Deploy Nodito CPU Temperature Monitoring - hosts: nodito - become: yes - vars_files: - - ../../infra_vars.yml - - ./nodito_vars.yml - - ./nodito_secrets.yml - - tasks: - - name: Validate Uptime Kuma URL is provided - assert: - that: - - nodito_uptime_kuma_cpu_temp_push_url != "" - fail_msg: "uptime_kuma_url must be set in nodito_secrets.yml" - - - name: Install required packages for temperature monitoring - package: - name: - - lm-sensors - - curl - - jq - - bc - state: present - - - name: Create monitoring script directory - file: - path: "{{ monitoring_script_dir }}" - state: directory - owner: root - group: root - mode: '0755' - - - name: Create CPU temperature monitoring script - copy: - dest: "{{ monitoring_script_path }}" - content: | - #!/bin/bash - - # CPU Temperature Monitoring Script for Nodito - # Monitors CPU temperature and sends alerts to Uptime Kuma - - LOG_FILE="{{ log_file }}" - TEMP_THRESHOLD="{{ temp_threshold_celsius }}" - UPTIME_KUMA_URL="{{ nodito_uptime_kuma_cpu_temp_push_url }}" - - # Function to log messages - log_message() { - echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" - } - - # Function to get CPU temperature - get_cpu_temp() { - # Try different methods to get CPU temperature - local temp="" - - # Method 1: sensors command (most common) - if command -v sensors >/dev/null 2>&1; then - temp=$(sensors 2>/dev/null | grep -E "Core 0|Package id 0|Tdie|Tctl" | head -1 | grep -oE '[0-9]+\.[0-9]+°C' | grep -oE '[0-9]+\.[0-9]+') - fi - - # Method 2: thermal zone (fallback) - if [ -z "$temp" ] && [ -f /sys/class/thermal/thermal_zone0/temp ]; then - temp=$(cat /sys/class/thermal/thermal_zone0/temp) - temp=$(echo "scale=1; $temp/1000" | bc -l 2>/dev/null || echo "$temp") - fi - - # Method 3: acpi (fallback) - if [ -z "$temp" ] && command -v acpi >/dev/null 2>&1; then - temp=$(acpi -t 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) - fi - - echo "$temp" - } - - # Function to send alert to Uptime Kuma - send_uptime_kuma_alert() { - local temp="$1" - local message="CPU Temperature Alert: ${temp}°C (Threshold: ${TEMP_THRESHOLD}°C)" - - log_message "ALERT: $message" - - # Send push notification to Uptime Kuma - encoded_message=$(printf '%s\n' "$message" | sed 's/ /%20/g; s/°/%C2%B0/g; s/(/%28/g; s/)/%29/g; s/:/%3A/g') - curl "$UPTIME_KUMA_URL?status=up&msg=$encoded_message" - - if [ $? -eq 0 ]; then - log_message "Alert sent successfully to Uptime Kuma" - else - log_message "ERROR: Failed to send alert to Uptime Kuma" - fi - } - - # Main monitoring logic - main() { - log_message "Starting CPU temperature check" - - # Get current CPU temperature - current_temp=$(get_cpu_temp) - - if [ -z "$current_temp" ]; then - log_message "ERROR: Could not read CPU temperature" - exit 1 - fi - - log_message "Current CPU temperature: ${current_temp}°C" - - # Check if temperature exceeds threshold - if (( $(echo "$current_temp > $TEMP_THRESHOLD" | bc -l) )); then - log_message "WARNING: CPU temperature ${current_temp}°C exceeds threshold ${TEMP_THRESHOLD}°C" - send_uptime_kuma_alert "$current_temp" - else - log_message "CPU temperature is within normal range" - fi - } - - # Run main function - main - owner: root - group: root - mode: '0755' - - - name: Create systemd service for CPU temperature monitoring - copy: - dest: "/etc/systemd/system/{{ systemd_service_name }}.service" - content: | - [Unit] - Description=Nodito CPU Temperature Monitor - After=network.target - - [Service] - Type=oneshot - ExecStart={{ monitoring_script_path }} - User=root - StandardOutput=journal - StandardError=journal - - [Install] - WantedBy=multi-user.target - owner: root - group: root - mode: '0644' - - - name: Create systemd timer for CPU temperature monitoring - copy: - dest: "/etc/systemd/system/{{ systemd_service_name }}.timer" - content: | - [Unit] - Description=Run Nodito CPU Temperature Monitor every {{ temp_check_interval_minutes }} minute(s) - Requires={{ systemd_service_name }}.service - - [Timer] - OnBootSec={{ temp_check_interval_minutes }}min - OnUnitActiveSec={{ temp_check_interval_minutes }}min - Persistent=true - - [Install] - WantedBy=timers.target - owner: root - group: root - mode: '0644' - - - name: Reload systemd daemon - systemd: - daemon_reload: yes - - - name: Enable and start CPU temperature monitoring timer - systemd: - name: "{{ systemd_service_name }}.timer" - enabled: yes - state: started - - - name: Test CPU temperature monitoring script - command: "{{ monitoring_script_path }}" - register: script_test - changed_when: false - - - name: Verify script execution - assert: - that: - - script_test.rc == 0 - fail_msg: "CPU temperature monitoring script failed to execute properly" - - - name: Check if sensors are available - command: sensors - register: sensors_check - changed_when: false - failed_when: false - - - name: Display sensor information - debug: - msg: "Sensor information: {{ sensors_check.stdout_lines if sensors_check.rc == 0 else 'Sensors not available - using fallback methods' }}" - - - name: Show monitoring configuration - debug: - msg: - - "CPU Temperature Monitoring configured successfully" - - "Temperature threshold: {{ temp_threshold_celsius }}°C" - - "Check interval: {{ temp_check_interval_minutes }} minute(s)" - - "Uptime Kuma URL: {{ nodito_uptime_kuma_cpu_temp_push_url }}" - - "Monitoring script: {{ monitoring_script_path }}" - - "Log file: {{ log_file }}" - - "Service: {{ systemd_service_name }}.service" - - "Timer: {{ systemd_service_name }}.timer" diff --git a/ansible/services/forgejo/forgejo_vars.yml b/ansible/services/forgejo/forgejo_vars.yml index ae43cbf..6277f9a 100644 --- a/ansible/services/forgejo/forgejo_vars.yml +++ b/ansible/services/forgejo/forgejo_vars.yml @@ -15,3 +15,7 @@ forgejo_user: "git" remote_host: "{{ groups['vipy'][0] }}" remote_user: "{{ hostvars[remote_host]['ansible_user'] }}" remote_key_file: "{{ hostvars[remote_host]['ansible_ssh_private_key_file'] | default('') }}" + +# Local backup +local_backup_dir: "{{ lookup('env', 'HOME') }}/forgejo-backups" +backup_script_path: "{{ lookup('env', 'HOME') }}/.local/bin/forgejo_backup.sh" diff --git a/ansible/services/forgejo/setup_backup_forgejo_to_lapy.yml b/ansible/services/forgejo/setup_backup_forgejo_to_lapy.yml new file mode 100644 index 0000000..7e27ba6 --- /dev/null +++ b/ansible/services/forgejo/setup_backup_forgejo_to_lapy.yml @@ -0,0 +1,86 @@ +--- +- name: Configure local backup for Forgejo from remote + hosts: lapy + gather_facts: no + vars_files: + - ../../infra_vars.yml + - ./forgejo_vars.yml + vars: + remote_data_path: "{{ forgejo_data_dir }}" + remote_config_path: "{{ forgejo_config_dir }}" + forgejo_service_name: "forgejo" + gpg_recipient: "{{ hostvars['localhost']['gpg_recipient'] | default('') }}" + gpg_key_id: "{{ hostvars['localhost']['gpg_key_id'] | default('') }}" + + tasks: + - name: Debug Forgejo backup vars + debug: + msg: + - "remote_host={{ remote_host }}" + - "remote_user={{ remote_user }}" + - "remote_data_path='{{ remote_data_path }}'" + - "remote_config_path='{{ remote_config_path }}'" + - "local_backup_dir={{ local_backup_dir }}" + - "gpg_recipient={{ gpg_recipient }}" + - "gpg_key_id={{ gpg_key_id }}" + + - name: Ensure local backup directory exists + ansible.builtin.file: + path: "{{ local_backup_dir }}" + state: directory + mode: "0755" + + - name: Ensure ~/.local/bin exists + ansible.builtin.file: + path: "{{ lookup('env', 'HOME') }}/.local/bin" + state: directory + mode: "0755" + + - name: Create Forgejo backup script + ansible.builtin.copy: + dest: "{{ backup_script_path }}" + mode: "0750" + content: | + #!/bin/bash + set -euo pipefail + + if [ -z "{{ gpg_recipient }}" ]; then + echo "GPG recipient is not configured. Aborting." + exit 1 + fi + + TIMESTAMP=$(date +'%Y-%m-%d') + ENCRYPTED_BACKUP="{{ local_backup_dir }}/forgejo-backup-$TIMESTAMP.tar.gz.gpg" + + {% if remote_key_file %} + SSH_CMD="ssh -i {{ remote_key_file }} -p {{ hostvars[remote_host]['ansible_port'] | default(22) }}" + {% else %} + SSH_CMD="ssh -p {{ hostvars[remote_host]['ansible_port'] | default(22) }}" + {% endif %} + + echo "Stopping Forgejo service..." + $SSH_CMD {{ remote_user }}@{{ remote_host }} "sudo systemctl stop {{ forgejo_service_name }}" + + echo "Creating encrypted backup archive..." + $SSH_CMD {{ remote_user }}@{{ remote_host }} "sudo tar -czf - {{ remote_data_path }} {{ remote_config_path }}" | \ + gpg --batch --yes --encrypt --recipient "{{ gpg_recipient }}" --output "$ENCRYPTED_BACKUP" + + echo "Starting Forgejo service..." + $SSH_CMD {{ remote_user }}@{{ remote_host }} "sudo systemctl start {{ forgejo_service_name }}" + + echo "Rotating old backups..." + find "{{ local_backup_dir }}" -name "forgejo-backup-*.tar.gz.gpg" -mtime +13 -delete + + echo "Backup completed successfully" + + - name: Ensure cronjob for Forgejo backup exists + ansible.builtin.cron: + name: "Forgejo backup" + user: "{{ lookup('env', 'USER') }}" + job: "{{ backup_script_path }}" + minute: 5 + hour: "9,12,15,18" + + - name: Run Forgejo backup script to create initial backup + ansible.builtin.command: "{{ backup_script_path }}" + diff --git a/ansible/services/headscale/deploy_headscale_playbook.yml b/ansible/services/headscale/deploy_headscale_playbook.yml index 0177ad4..e8a2b37 100644 --- a/ansible/services/headscale/deploy_headscale_playbook.yml +++ b/ansible/services/headscale/deploy_headscale_playbook.yml @@ -11,6 +11,7 @@ caddy_sites_dir: "{{ caddy_sites_dir }}" headscale_domain: "{{ headscale_subdomain }}.{{ root_domain }}" headscale_base_domain: "tailnet.{{ root_domain }}" + headscale_namespace: "{{ service_settings.headscale.namespace }}" uptime_kuma_api_url: "https://{{ subdomains.uptime_kuma }}.{{ root_domain }}" tasks: diff --git a/ansible/services/headscale/headscale_vars.yml b/ansible/services/headscale/headscale_vars.yml index d2f785a..39b70b6 100644 --- a/ansible/services/headscale/headscale_vars.yml +++ b/ansible/services/headscale/headscale_vars.yml @@ -7,12 +7,11 @@ headscale_grpc_port: 50443 # Version headscale_version: "0.26.1" -# Namespace for devices (users in headscale terminology) -headscale_namespace: counter-net - # Data directory headscale_data_dir: /var/lib/headscale +# Namespace now configured in services_config.yml under service_settings.headscale.namespace + # Remote access remote_host: "{{ groups['spacey'][0] }}" remote_user: "{{ hostvars[remote_host]['ansible_user'] }}" diff --git a/ansible/services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml b/ansible/services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml index 00ccca7..18a3b72 100644 --- a/ansible/services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml +++ b/ansible/services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml @@ -3,12 +3,17 @@ become: yes vars_files: - ../../infra_vars.yml + - ../../infra_secrets.yml - ../../services_config.yml - ./ntfy_emergency_app_vars.yml vars: ntfy_emergency_app_subdomain: "{{ subdomains.ntfy_emergency_app }}" caddy_sites_dir: "{{ caddy_sites_dir }}" ntfy_emergency_app_domain: "{{ ntfy_emergency_app_subdomain }}.{{ root_domain }}" + ntfy_service_domain: "{{ subdomains.ntfy }}.{{ root_domain }}" + ntfy_emergency_app_ntfy_url: "https://{{ ntfy_service_domain }}" + ntfy_emergency_app_ntfy_user: "{{ ntfy_username | default('') }}" + ntfy_emergency_app_ntfy_password: "{{ ntfy_password | default('') }}" tasks: - name: Create ntfy-emergency-app directory diff --git a/ansible/services/ntfy-emergency-app/ntfy_emergency_app_vars.yml b/ansible/services/ntfy-emergency-app/ntfy_emergency_app_vars.yml index ae50e20..d551c4c 100644 --- a/ansible/services/ntfy-emergency-app/ntfy_emergency_app_vars.yml +++ b/ansible/services/ntfy-emergency-app/ntfy_emergency_app_vars.yml @@ -6,9 +6,6 @@ ntfy_emergency_app_port: 3000 # ntfy configuration ntfy_emergency_app_topic: "emergencia" -ntfy_emergency_app_ntfy_url: "https://ntfy.contrapeso.xyz" -ntfy_emergency_app_ntfy_user: "counterweight" -ntfy_emergency_app_ntfy_password: "superntfyme" ntfy_emergency_app_ui_message: "Leave Pablo a message, he will respond as soon as possible" # Remote access diff --git a/ansible/services/ntfy/ntfy_vars.yml b/ansible/services/ntfy/ntfy_vars.yml index 5364e44..5ebec37 100644 --- a/ansible/services/ntfy/ntfy_vars.yml +++ b/ansible/services/ntfy/ntfy_vars.yml @@ -1,2 +1,3 @@ ntfy_port: 6674 -ntfy_topic: alerts # Topic for Uptime Kuma notifications \ No newline at end of file + +# ntfy_topic now lives in services_config.yml under service_settings.ntfy.topic \ No newline at end of file diff --git a/ansible/services/ntfy/setup_ntfy_uptime_kuma_notification.yml b/ansible/services/ntfy/setup_ntfy_uptime_kuma_notification.yml index 9061d77..5ba03f1 100644 --- a/ansible/services/ntfy/setup_ntfy_uptime_kuma_notification.yml +++ b/ansible/services/ntfy/setup_ntfy_uptime_kuma_notification.yml @@ -6,10 +6,10 @@ - ../../services_config.yml - ../../infra_secrets.yml - ./ntfy_vars.yml - - ../uptime_kuma/uptime_kuma_vars.yml vars: ntfy_subdomain: "{{ subdomains.ntfy }}" + ntfy_topic: "{{ service_settings.ntfy.topic }}" uptime_kuma_subdomain: "{{ subdomains.uptime_kuma }}" ntfy_domain: "{{ ntfy_subdomain }}.{{ root_domain }}" ntfy_server_url: "https://{{ ntfy_domain }}" diff --git a/ansible/services/personal-blog/deploy_personal_blog_playbook.yml b/ansible/services/personal-blog/deploy_personal_blog_playbook.yml deleted file mode 100644 index ae951dc..0000000 --- a/ansible/services/personal-blog/deploy_personal_blog_playbook.yml +++ /dev/null @@ -1,105 +0,0 @@ -- name: Deploy personal blog static site - hosts: vipy - become: yes - vars_files: - - ../../infra_vars.yml - - ../../services_config.yml - - ./personal_blog_vars.yml - vars: - personal_blog_subdomain: "{{ subdomains.personal_blog }}" - caddy_sites_dir: "{{ caddy_sites_dir }}" - personal_blog_domain: "{{ personal_blog_subdomain }}.{{ root_domain }}" - - tasks: - - name: Install git - apt: - name: git - state: present - - - name: Create source directory for blog - file: - path: "{{ personal_blog_source_dir }}" - state: directory - owner: root - group: root - mode: '0755' - - - name: Create webroot directory - file: - path: "{{ personal_blog_webroot }}" - state: directory - owner: www-data - group: www-data - mode: '0755' - - - name: Clone blog repository with token authentication - git: - repo: "https://{{ personal_blog_git_username }}:{{ lookup('env', 'PERSONAL_BLOG_DEPLOY_TOKEN') }}@forgejo.contrapeso.xyz/counterweight/pablohere.git" - dest: "{{ personal_blog_source_dir }}" - version: master - force: yes - become_user: root - - - name: Copy static files to webroot - shell: | - rsync -av --delete {{ personal_blog_source_dir }}/{{ personal_blog_source_folder }}/ {{ personal_blog_webroot }}/ - args: - creates: "{{ personal_blog_webroot }}/index.html" - - - name: Set ownership and permissions for webroot - file: - path: "{{ personal_blog_webroot }}" - owner: www-data - group: www-data - recurse: yes - state: directory - - - name: Ensure Caddy sites-enabled directory exists - file: - path: "{{ caddy_sites_dir }}" - state: directory - owner: root - group: root - mode: '0755' - - - name: Ensure Caddyfile includes import directive for sites-enabled - lineinfile: - path: /etc/caddy/Caddyfile - line: 'import sites-enabled/*' - insertafter: EOF - state: present - backup: yes - - - name: Create Caddy static site configuration - copy: - dest: "{{ caddy_sites_dir }}/personal-blog.conf" - content: | - {{ personal_blog_domain }} { - root * {{ personal_blog_webroot }} - file_server - } - owner: root - group: root - mode: '0644' - - - name: Reload Caddy to apply new config - command: systemctl reload caddy - - - name: Create update script for blog - copy: - dest: /usr/local/bin/update-personal-blog.sh - content: | - #!/bin/bash - cd {{ personal_blog_source_dir }} - git pull https://{{ personal_blog_git_username }}:${PERSONAL_BLOG_DEPLOY_TOKEN}@forgejo.contrapeso.xyz/counterweight/pablohere.git master - rsync -av --delete {{ personal_blog_source_dir }}/{{ personal_blog_source_folder }}/ {{ personal_blog_webroot }}/ - chown -R www-data:www-data {{ personal_blog_webroot }} - owner: root - group: root - mode: '0755' - - - name: Add cron job to update blog every hour - cron: - name: "Update personal blog" - job: "0 * * * * PERSONAL_BLOG_DEPLOY_TOKEN={{ lookup('env', 'PERSONAL_BLOG_DEPLOY_TOKEN') }} /usr/local/bin/update-personal-blog.sh" - user: root diff --git a/ansible/services/personal-blog/personal_blog_vars.yml b/ansible/services/personal-blog/personal_blog_vars.yml deleted file mode 100644 index 69226c9..0000000 --- a/ansible/services/personal-blog/personal_blog_vars.yml +++ /dev/null @@ -1,6 +0,0 @@ -# (caddy_sites_dir and subdomain now in services_config.yml) -personal_blog_git_repo: https://forgejo.contrapeso.xyz/counterweight/pablohere.git -personal_blog_git_username: counterweight -personal_blog_source_dir: /opt/personal-blog -personal_blog_webroot: /var/www/pablohere.contrapeso.xyz -personal_blog_source_folder: public diff --git a/ansible/services_config.yml b/ansible/services_config.yml index 83ad3c4..c61a6f5 100644 --- a/ansible/services_config.yml +++ b/ansible/services_config.yml @@ -16,7 +16,6 @@ subdomains: lnbits: test-lnbits # Secondary Services (on vipy) - personal_blog: test-blog ntfy_emergency_app: test-emergency # Memos (on memos-box) @@ -24,3 +23,10 @@ subdomains: # Caddy configuration caddy_sites_dir: /etc/caddy/sites-enabled + +# Service-specific settings shared across playbooks +service_settings: + ntfy: + topic: alerts + headscale: + namespace: counter-net diff --git a/ansible/services_config.yml.example b/ansible/services_config.yml.example index fedadbf..972b685 100644 --- a/ansible/services_config.yml.example +++ b/ansible/services_config.yml.example @@ -16,7 +16,6 @@ subdomains: lnbits: lnbits # Secondary Services (on vipy) - personal_blog: blog ntfy_emergency_app: emergency # Memos (on memos-box) @@ -24,3 +23,10 @@ subdomains: # Caddy configuration caddy_sites_dir: /etc/caddy/sites-enabled + +# Service-specific settings shared across playbooks +service_settings: + ntfy: + topic: alerts + headscale: + namespace: counter-net diff --git a/human_script.md b/human_script.md index 3dffce5..a4e3959 100644 --- a/human_script.md +++ b/human_script.md @@ -258,7 +258,6 @@ All web services depend on Caddy: - Vaultwarden (vipy) - Forgejo (vipy) - LNBits (vipy) -- Personal Blog (vipy) - ntfy-emergency-app (vipy) ### Verification: @@ -629,7 +628,7 @@ ansible-playbook -i inventory.ini infra/420_system_healthcheck.yml \ -e "healthcheck_interval_seconds=30" # CPU temp with custom threshold -ansible-playbook -i inventory.ini infra/nodito/40_cpu_temp_alerts.yml \ +ansible-playbook -i inventory.ini infra/430_cpu_temp_alerts.yml \ -e "temp_threshold_celsius=75" ``` @@ -815,7 +814,47 @@ Manual verification: ## Layer 8: Secondary Services -**Status:** 🔒 Locked (Complete Layer 7 first) +**Goal:** Deploy auxiliary services that depend on the core stack: ntfy-emergency-app and memos. + +**Script:** `./scripts/setup_layer_8_secondary_services.sh` + +### What This Layer Does: +- Deploys the ntfy-emergency-app container on vipy and proxies it through Caddy +- Optionally deploys Memos on `memos-box` (skips automatically if the host is not yet in `inventory.ini`) + +### Prerequisites (Complete BEFORE Running): +- ✅ Layers 0–7 complete (Caddy, ntfy, and Uptime Kuma already online) +- ✅ `ansible/services_config.yml` reviewed so the `ntfy_emergency_app` and `memos` subdomains match your plan +- ✅ `ansible/infra_secrets.yml` contains valid `ntfy_username` and `ntfy_password` +- ✅ DNS A records created for the subdomains (see below) +- ✅ If deploying Memos, ensure `memos-box` exists in `inventory.ini` and is reachable as the `counterweight` user + +### DNS Requirements: +- `.` → vipy IP +- `.` → memos-box IP (skip if memos not yet provisioned) + +The script runs `dig` to validate DNS before deploying and will warn if records are missing or pointing elsewhere. + +### Run the Script: +```bash +source venv/bin/activate +cd /home/counterweight/personal_infra +./scripts/setup_layer_8_secondary_services.sh +``` + +You can deploy each service independently; the script asks for confirmation before running each playbook. + +### Post-Deployment Steps: +- **ntfy-emergency-app:** Visit the emergency subdomain, trigger a test notification, and verify ntfy receives it +- **Memos (if deployed):** Visit the memos subdomain, create the first admin user, and adjust settings from the UI + +### Verification: +- The script checks for the presence of Caddy configs, running containers, and Memos systemd service status +- Review Uptime Kuma or add monitors for these services if you want automatic alerting + +### Optional Follow-Ups: +- Configure backups for any new data stores (e.g., snapshot memos data) +- Add Uptime Kuma monitors for the new services if you want automated alerting --- diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index dd87a51..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,140 +0,0 @@ -# Infrastructure Setup Scripts - -This directory contains automated setup scripts for each layer of the infrastructure. - -## Overview - -Each script handles a complete layer of the infrastructure setup: -- Prompts for required variables -- Validates prerequisites -- Creates configuration files -- Executes playbooks -- Verifies completion - -## Usage - -Run scripts in order, completing one layer before moving to the next: - -### Layer 0: Foundation Setup -```bash -./scripts/setup_layer_0.sh -``` -Sets up Ansible control node on your laptop. - -### Layer 1A: VPS Basic Setup -```bash -source venv/bin/activate -./scripts/setup_layer_1a_vps.sh -``` -Configures users, SSH, firewall, and fail2ban on VPS machines (vipy, watchtower, spacey). -**Runs independently** - no Nodito required. - -### Layer 1B: Nodito (Proxmox) Setup -```bash -source venv/bin/activate -./scripts/setup_layer_1b_nodito.sh -``` -Configures Nodito Proxmox server: bootstrap, community repos, optional ZFS. -**Runs independently** - no VPS required. - -### Layer 2: General Infrastructure Tools -```bash -source venv/bin/activate -./scripts/setup_layer_2.sh -``` -Installs rsync and docker on hosts that need them. -- **rsync:** For backup operations (vipy, watchtower, lapy recommended) -- **docker:** For containerized services (vipy, watchtower recommended) -- Interactive: Choose which hosts get which tools - -### Layer 3: Reverse Proxy (Caddy) -```bash -source venv/bin/activate -./scripts/setup_layer_3_caddy.sh -``` -Deploys Caddy reverse proxy on VPS machines (vipy, watchtower, spacey). -- **Critical:** All web services depend on Caddy -- Automatic HTTPS with Let's Encrypt -- Opens firewall ports 80/443 -- Creates sites-enabled directory structure - -### Layer 4: Core Monitoring & Notifications -```bash -source venv/bin/activate -./scripts/setup_layer_4_monitoring.sh -``` -Deploys ntfy and Uptime Kuma on watchtower. -- **ntfy:** Notification service for alerts -- **Uptime Kuma:** Monitoring platform for all services -- **Critical:** All infrastructure monitoring depends on these -- Sets up backups (optional) -- **Post-deploy:** Create Uptime Kuma admin user and update infra_secrets.yml - -### Layer 5: VPN Infrastructure (Headscale) -```bash -source venv/bin/activate -./scripts/setup_layer_5_headscale.sh -``` -Deploys Headscale VPN mesh networking on spacey. -- **OPTIONAL** - Skip to Layer 6 if you don't need VPN -- Secure mesh networking between all machines -- Magic DNS for hostname resolution -- NAT traversal support -- Can join machines automatically or manually -- Post-deploy: Configure ACL policies for machine communication - -### Layer 6: Infrastructure Monitoring -```bash -source venv/bin/activate -./scripts/setup_layer_6_infra_monitoring.sh -``` -Deploys automated monitoring for infrastructure. -- **Requires:** Uptime Kuma credentials in infra_secrets.yml (Layer 4) -- Disk usage monitoring with auto-created push monitors -- System healthcheck (heartbeat) monitoring -- CPU temperature monitoring (nodito only) -- Interactive selection of which hosts to monitor -- All monitors organized by host groups - -### Layer 7: Core Services -```bash -source venv/bin/activate -./scripts/setup_layer_7_services.sh -``` -Deploys core services on vipy: Vaultwarden, Forgejo, LNBits. -- Password manager (Vaultwarden) with /alive endpoint -- Git server (Forgejo) with /api/healthz endpoint -- Lightning wallet (LNBits) with /api/v1/health endpoint -- **Automatic:** Creates Uptime Kuma monitors in "services" group -- **Requires:** Uptime Kuma credentials in infra_secrets.yml -- Optional: Configure backups to lapy - -### Layer 8+ -More scripts will be added as we build out each layer. - -## Important Notes - -1. **Centralized Configuration:** - - All service subdomains are configured in `ansible/services_config.yml` - - Edit this ONE file instead of multiple vars files - - Created automatically in Layer 0 - - DNS records must match the subdomains you configure - -2. **Always activate the venv first** (except for Layer 0): - ```bash - source venv/bin/activate - ``` - -3. **Complete each layer fully** before moving to the next - -4. **Scripts are idempotent** - safe to run multiple times - -5. **Review changes** before confirming actions - -## Getting Started - -1. Read `../human_script.md` for the complete guide -2. Start with Layer 0 -3. Follow the prompts -4. Proceed layer by layer - diff --git a/scripts/setup_layer_6_infra_monitoring.sh b/scripts/setup_layer_6_infra_monitoring.sh index 793e646..7f51bb9 100755 --- a/scripts/setup_layer_6_infra_monitoring.sh +++ b/scripts/setup_layer_6_infra_monitoring.sh @@ -374,44 +374,16 @@ deploy_cpu_temp_monitoring() { echo " • Check interval: 60 seconds" echo "" - # Check if nodito_secrets.yml exists - if [ ! -f "$ANSIBLE_DIR/infra/nodito/nodito_secrets.yml" ]; then - print_warning "nodito_secrets.yml not found" - print_info "You need to create this file with Uptime Kuma push URL" - - if confirm_action "Create nodito_secrets.yml now?"; then - # Get Uptime Kuma URL - local root_domain=$(grep "^root_domain:" "$ANSIBLE_DIR/infra_vars.yml" | awk '{print $2}' 2>/dev/null) - local uk_subdomain=$(grep "^uptime_kuma_subdomain:" "$ANSIBLE_DIR/services/uptime_kuma/uptime_kuma_vars.yml" | awk '{print $2}' 2>/dev/null || echo "uptime") - - echo -e -n "${BLUE}Enter Uptime Kuma push URL${NC} (e.g., https://${uk_subdomain}.${root_domain}/api/push/xxxxx): " - read push_url - - mkdir -p "$ANSIBLE_DIR/infra/nodito" - cat > "$ANSIBLE_DIR/infra/nodito/nodito_secrets.yml" << EOF -# Nodito Secrets -# DO NOT commit to git - -# Uptime Kuma Push URL for CPU temperature monitoring -nodito_uptime_kuma_cpu_temp_push_url: "${push_url}" -EOF - print_success "Created nodito_secrets.yml" - else - print_warning "Skipping CPU temp monitoring" - return 0 - fi - fi - echo "" if ! confirm_action "Proceed with CPU temp monitoring deployment?"; then print_warning "Skipped" return 0 fi - print_info "Running: ansible-playbook -i inventory.ini infra/nodito/40_cpu_temp_alerts.yml" + print_info "Running: ansible-playbook -i inventory.ini infra/430_cpu_temp_alerts.yml" echo "" - if ansible-playbook -i inventory.ini infra/nodito/40_cpu_temp_alerts.yml; then + if ansible-playbook -i inventory.ini infra/430_cpu_temp_alerts.yml; then print_success "CPU temperature monitoring deployed" return 0 else diff --git a/scripts/setup_layer_8_secondary_services.sh b/scripts/setup_layer_8_secondary_services.sh new file mode 100755 index 0000000..9244c3d --- /dev/null +++ b/scripts/setup_layer_8_secondary_services.sh @@ -0,0 +1,363 @@ +#!/bin/bash + +############################################################################### +# Layer 8: Secondary Services +# +# This script deploys the ntfy-emergency-app and memos services. +# Must be run after Layers 0-7 are complete. +############################################################################### + +set -e # Exit on error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Project directories +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +ANSIBLE_DIR="$PROJECT_ROOT/ansible" + +declare -a LAYER_SUMMARY=() + +print_header() { + echo -e "\n${BLUE}========================================${NC}" + echo -e "${BLUE}$1${NC}" + echo -e "${BLUE}========================================${NC}\n" +} + +print_success() { + echo -e "${GREEN}✓${NC} $1" +} + +print_error() { + echo -e "${RED}✗${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}⚠${NC} $1" +} + +print_info() { + echo -e "${BLUE}ℹ${NC} $1" +} + +confirm_action() { + local prompt="$1" + local response + + read -p "$(echo -e ${YELLOW}${prompt}${NC} [y/N]: )" response + [[ "$response" =~ ^[Yy]$ ]] +} + +record_summary() { + LAYER_SUMMARY+=("$1") +} + +get_hosts_from_inventory() { + local group="$1" + cd "$ANSIBLE_DIR" + ansible-inventory -i inventory.ini --list | \ + python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('$group', {}).get('hosts', [])))" 2>/dev/null || echo "" +} + +get_primary_host_ip() { + local group="$1" + cd "$ANSIBLE_DIR" + ansible-inventory -i inventory.ini --list | \ + python3 -c "import sys, json; data=json.load(sys.stdin); hosts=data.get('$group', {}).get('hosts', []); print(hosts[0] if hosts else '')" 2>/dev/null || echo "" +} + +check_prerequisites() { + print_header "Verifying Prerequisites" + + local errors=0 + + if [ -z "$VIRTUAL_ENV" ]; then + print_error "Virtual environment not activated" + echo "Run: source venv/bin/activate" + ((errors++)) + else + print_success "Virtual environment activated" + fi + + if ! command -v ansible &> /dev/null; then + print_error "Ansible not found" + ((errors++)) + else + print_success "Ansible found" + fi + + if [ ! -f "$ANSIBLE_DIR/inventory.ini" ]; then + print_error "inventory.ini not found" + ((errors++)) + else + print_success "inventory.ini exists" + fi + + if [ ! -f "$ANSIBLE_DIR/infra_vars.yml" ]; then + print_error "infra_vars.yml not found" + ((errors++)) + else + print_success "infra_vars.yml exists" + fi + + if [ ! -f "$ANSIBLE_DIR/services_config.yml" ]; then + print_error "services_config.yml not found" + ((errors++)) + else + print_success "services_config.yml exists" + fi + + if ! grep -q "^\[vipy\]" "$ANSIBLE_DIR/inventory.ini"; then + print_error "vipy not configured in inventory.ini" + ((errors++)) + else + print_success "vipy configured in inventory" + fi + + if ! grep -q "^\[memos-box\]" "$ANSIBLE_DIR/inventory.ini"; then + print_warning "memos-box not configured in inventory.ini (memos deployment will be skipped)" + else + print_success "memos-box configured in inventory" + fi + + if [ $errors -gt 0 ]; then + print_error "Prerequisites not met. Resolve the issues above and re-run the script." + exit 1 + fi + + print_success "Prerequisites verified" + + # Display configured subdomains + local emergency_subdomain=$(grep "^ ntfy_emergency_app:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "emergency") + local memos_subdomain=$(grep "^ memos:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "memos") + + print_info "Configured subdomains:" + echo " • ntfy_emergency_app: $emergency_subdomain" + echo " • memos: $memos_subdomain" + echo "" +} + +check_dns_configuration() { + print_header "Validating DNS Configuration" + + if ! command -v dig &> /dev/null; then + print_warning "dig command not found. Skipping DNS validation." + print_info "Install dnsutils/bind-tools to enable DNS validation." + return 0 + fi + + cd "$ANSIBLE_DIR" + + local root_domain + root_domain=$(grep "^root_domain:" "$ANSIBLE_DIR/infra_vars.yml" | awk '{print $2}' 2>/dev/null) + + if [ -z "$root_domain" ]; then + print_error "Could not determine root_domain from infra_vars.yml" + return 1 + fi + + local emergency_subdomain=$(grep "^ ntfy_emergency_app:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "emergency") + local memos_subdomain=$(grep "^ memos:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "memos") + + local vipy_ip + vipy_ip=$(get_primary_host_ip "vipy") + + if [ -z "$vipy_ip" ]; then + print_error "Unable to determine vipy IP from inventory" + return 1 + fi + + local memos_ip="" + if grep -q "^\[memos-box\]" "$ANSIBLE_DIR/inventory.ini"; then + memos_ip=$(get_primary_host_ip "memos-box") + fi + + local dns_ok=true + + local emergency_fqdn="${emergency_subdomain}.${root_domain}" + local memos_fqdn="${memos_subdomain}.${root_domain}" + + print_info "Expected DNS:" + echo " • $emergency_fqdn → $vipy_ip" + if [ -n "$memos_ip" ]; then + echo " • $memos_fqdn → $memos_ip" + else + echo " • $memos_fqdn → (skipped - memos-box not in inventory)" + fi + echo "" + + local resolved + + print_info "Checking $emergency_fqdn..." + resolved=$(dig +short "$emergency_fqdn" | head -n1) + if [ "$resolved" = "$vipy_ip" ]; then + print_success "$emergency_fqdn resolves to $resolved" + elif [ -n "$resolved" ]; then + print_error "$emergency_fqdn resolves to $resolved (expected $vipy_ip)" + dns_ok=false + else + print_error "$emergency_fqdn does not resolve" + dns_ok=false + fi + + if [ -n "$memos_ip" ]; then + print_info "Checking $memos_fqdn..." + resolved=$(dig +short "$memos_fqdn" | head -n1) + if [ "$resolved" = "$memos_ip" ]; then + print_success "$memos_fqdn resolves to $resolved" + elif [ -n "$resolved" ]; then + print_error "$memos_fqdn resolves to $resolved (expected $memos_ip)" + dns_ok=false + else + print_error "$memos_fqdn does not resolve" + dns_ok=false + fi + fi + + echo "" + + if [ "$dns_ok" = false ]; then + print_error "DNS validation failed." + print_info "Update DNS records as shown above and wait for propagation." + echo "" + if ! confirm_action "Continue anyway? (SSL certificates will fail without correct DNS)"; then + exit 1 + fi + else + print_success "DNS validation passed" + fi +} + +deploy_ntfy_emergency_app() { + print_header "Deploying ntfy-emergency-app" + + cd "$ANSIBLE_DIR" + + print_info "This deploys the emergency notification interface pointing at ntfy." + echo "" + + if ! confirm_action "Deploy / update the ntfy-emergency-app?"; then + print_warning "Skipped ntfy-emergency-app deployment" + record_summary "${YELLOW}• ntfy-emergency-app${NC}: skipped" + return 0 + fi + + print_info "Running: ansible-playbook -i inventory.ini services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml" + echo "" + + if ansible-playbook -i inventory.ini services/ntfy-emergency-app/deploy_ntfy_emergency_app_playbook.yml; then + print_success "ntfy-emergency-app deployed successfully" + record_summary "${GREEN}• ntfy-emergency-app${NC}: deployed" + else + print_error "ntfy-emergency-app deployment failed" + record_summary "${RED}• ntfy-emergency-app${NC}: failed" + fi +} + +deploy_memos() { + print_header "Deploying Memos" + + if ! grep -q "^\[memos-box\]" "$ANSIBLE_DIR/inventory.ini"; then + print_warning "memos-box not in inventory. Skipping memos deployment." + record_summary "${YELLOW}• memos${NC}: skipped (memos-box missing)" + return 0 + fi + + cd "$ANSIBLE_DIR" + + if ! confirm_action "Deploy / update memos on memos-box?"; then + print_warning "Skipped memos deployment" + record_summary "${YELLOW}• memos${NC}: skipped" + return 0 + fi + + print_info "Running: ansible-playbook -i inventory.ini services/memos/deploy_memos_playbook.yml" + echo "" + + if ansible-playbook -i inventory.ini services/memos/deploy_memos_playbook.yml; then + print_success "Memos deployed successfully" + record_summary "${GREEN}• memos${NC}: deployed" + else + print_error "Memos deployment failed" + record_summary "${RED}• memos${NC}: failed" + fi +} + +verify_services() { + print_header "Verifying Deployments" + + cd "$ANSIBLE_DIR" + + local ssh_key=$(grep "ansible_ssh_private_key_file" "$ANSIBLE_DIR/inventory.ini" | head -n1 | sed 's/.*ansible_ssh_private_key_file=\([^ ]*\).*/\1/') + ssh_key="${ssh_key/#\~/$HOME}" + + local vipy_host + vipy_host=$(get_hosts_from_inventory "vipy") + + if [ -n "$vipy_host" ]; then + print_info "Checking services on vipy ($vipy_host)..." + + if timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$vipy_host "docker ps | grep ntfy-emergency-app" &>/dev/null; then + print_success "ntfy-emergency-app container running" + else + print_warning "ntfy-emergency-app container not running" + fi + + echo "" + fi + + if grep -q "^\[memos-box\]" "$ANSIBLE_DIR/inventory.ini"; then + local memos_host + memos_host=$(get_hosts_from_inventory "memos-box") + + if [ -n "$memos_host" ]; then + print_info "Checking memos on memos-box ($memos_host)..." + if timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$memos_host "systemctl is-active memos" &>/dev/null; then + print_success "memos systemd service running" + else + print_warning "memos systemd service not running" + fi + echo "" + fi + fi +} + +print_summary() { + print_header "Layer 8 Summary" + + if [ ${#LAYER_SUMMARY[@]} -eq 0 ]; then + print_info "No actions were performed." + return + fi + + for entry in "${LAYER_SUMMARY[@]}"; do + echo -e "$entry" + done + + echo "" + print_info "Next steps:" + echo " • Visit each service's subdomain to complete any manual setup." + echo " • Configure backups for new services if applicable." + echo " • Update Uptime Kuma monitors if additional endpoints are desired." +} + +main() { + print_header "Layer 8: Secondary Services" + + check_prerequisites + check_dns_configuration + + deploy_ntfy_emergency_app + deploy_memos + + verify_services + print_summary +} + +main "$@" + diff --git a/tofu/nodito/README.md b/tofu/nodito/README.md index a6762a5..ffb24c1 100644 --- a/tofu/nodito/README.md +++ b/tofu/nodito/README.md @@ -45,6 +45,14 @@ vms = { memory_mb = 2048 disk_size_gb = 20 ipconfig0 = "ip=dhcp" # or "ip=192.168.1.50/24,gw=192.168.1.1" + data_disks = [ + { + size_gb = 50 + # optional overrides: + # storage = "proxmox-tank-1" + # slot = "scsi2" + } + ] } } ``` diff --git a/tofu/nodito/main.tf b/tofu/nodito/main.tf index cc7a75d..6ad5d15 100644 --- a/tofu/nodito/main.tf +++ b/tofu/nodito/main.tf @@ -59,6 +59,16 @@ resource "proxmox_vm_qemu" "vm" { # optional flags like iothread/ssd/discard differ by provider versions; keep minimal } + dynamic "disk" { + for_each = try(each.value.data_disks, []) + content { + slot = try(disk.value.slot, format("scsi%s", tonumber(disk.key) + 1)) + type = "disk" + storage = try(disk.value.storage, var.zfs_storage_name) + size = "${disk.value.size_gb}G" + } + } + # Cloud-init CD-ROM so ipconfig0/sshkeys apply disk { slot = "ide2" diff --git a/tofu/nodito/terraform.tfvars.example b/tofu/nodito/terraform.tfvars.example index cc88b3f..b4149c8 100644 --- a/tofu/nodito/terraform.tfvars.example +++ b/tofu/nodito/terraform.tfvars.example @@ -20,6 +20,13 @@ vms = { memory_mb = 2048 disk_size_gb = 20 ipconfig0 = "ip=dhcp" + data_disks = [ + { + size_gb = 50 + # optional: storage = "proxmox-tank-1" + # optional: slot = "scsi2" + } + ] } db1 = { diff --git a/tofu/nodito/variables.tf b/tofu/nodito/variables.tf index 30a1418..3f16e75 100644 --- a/tofu/nodito/variables.tf +++ b/tofu/nodito/variables.tf @@ -55,6 +55,11 @@ variable "vms" { disk_size_gb = number vlan_tag = optional(number) ipconfig0 = optional(string) # e.g. "ip=dhcp" or "ip=192.168.1.50/24,gw=192.168.1.1" + data_disks = optional(list(object({ + size_gb = number + storage = optional(string) + slot = optional(string) + })), []) })) default = {} }