personal_infra/scripts/setup_layer_5_headscale.sh
2025-11-06 23:09:44 +01:00

494 lines
16 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
###############################################################################
# Layer 5: VPN Infrastructure (Headscale)
#
# This script deploys Headscale and optionally joins machines to the mesh.
# Must be run after Layers 0, 1A, and 3 are complete.
# THIS LAYER IS OPTIONAL - skip to Layer 6 if you don't need VPN.
###############################################################################
set -e # Exit on error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Project root directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
ANSIBLE_DIR="$PROJECT_ROOT/ansible"
###############################################################################
# Helper Functions
###############################################################################
print_header() {
echo -e "\n${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}\n"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
print_warning() {
echo -e "${YELLOW}${NC} $1"
}
print_info() {
echo -e "${BLUE}${NC} $1"
}
confirm_action() {
local prompt="$1"
local response
read -p "$(echo -e ${YELLOW}${prompt}${NC} [y/N]: )" response
[[ "$response" =~ ^[Yy]$ ]]
}
###############################################################################
# Verification Functions
###############################################################################
check_prerequisites() {
print_header "Verifying Prerequisites"
local errors=0
if [ -z "$VIRTUAL_ENV" ]; then
print_error "Virtual environment not activated"
echo "Run: source venv/bin/activate"
((errors++))
else
print_success "Virtual environment activated"
fi
if ! command -v ansible &> /dev/null; then
print_error "Ansible not found"
((errors++))
else
print_success "Ansible found"
fi
if [ ! -f "$ANSIBLE_DIR/inventory.ini" ]; then
print_error "inventory.ini not found"
((errors++))
else
print_success "inventory.ini exists"
fi
# Check if spacey is configured
if ! grep -q "^\[spacey\]" "$ANSIBLE_DIR/inventory.ini"; then
print_error "spacey not configured in inventory.ini"
print_info "Layer 5 requires spacey VPS for Headscale server"
((errors++))
else
print_success "spacey configured in inventory"
fi
if [ $errors -gt 0 ]; then
print_error "Prerequisites not met"
exit 1
fi
print_success "Prerequisites verified"
}
get_hosts_from_inventory() {
local group="$1"
cd "$ANSIBLE_DIR"
ansible-inventory -i inventory.ini --list | \
python3 -c "import sys, json; data=json.load(sys.stdin); print(' '.join(data.get('$group', {}).get('hosts', [])))" 2>/dev/null || echo ""
}
check_vars_files() {
print_header "Checking Configuration Files"
# Check services_config.yml
if [ ! -f "$ANSIBLE_DIR/services_config.yml" ]; then
print_error "services_config.yml not found"
print_info "This file should have been created in Layer 0"
exit 1
fi
print_success "services_config.yml exists"
# Show configured subdomain
local hs_sub=$(grep "^ headscale:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "headscale")
print_info "Configured subdomain: headscale: $hs_sub"
echo ""
}
check_dns_configuration() {
print_header "Validating DNS Configuration"
cd "$ANSIBLE_DIR"
# Get spacey IP
local spacey_ip=$(ansible-inventory -i inventory.ini --list | python3 -c "import sys, json; data=json.load(sys.stdin); hosts=data.get('spacey', {}).get('hosts', []); print(hosts[0] if hosts else '')" 2>/dev/null)
if [ -z "$spacey_ip" ]; then
print_error "Could not determine spacey IP from inventory"
return 1
fi
print_info "Spacey IP: $spacey_ip"
echo ""
# Get domain from infra_vars.yml
local root_domain=$(grep "^root_domain:" "$ANSIBLE_DIR/infra_vars.yml" | awk '{print $2}' 2>/dev/null)
if [ -z "$root_domain" ]; then
print_error "Could not determine root_domain from infra_vars.yml"
return 1
fi
# Get subdomain from centralized config
local headscale_subdomain="headscale"
if [ -f "$ANSIBLE_DIR/services_config.yml" ]; then
headscale_subdomain=$(grep "^ headscale:" "$ANSIBLE_DIR/services_config.yml" | awk '{print $2}' 2>/dev/null || echo "headscale")
fi
local headscale_fqdn="${headscale_subdomain}.${root_domain}"
print_info "Checking DNS record..."
echo ""
# Check Headscale DNS
print_info "Checking $headscale_fqdn..."
if command -v dig &> /dev/null; then
local resolved=$(dig +short "$headscale_fqdn" | head -n1)
if [ "$resolved" = "$spacey_ip" ]; then
print_success "$headscale_fqdn$resolved"
elif [ -n "$resolved" ]; then
print_error "$headscale_fqdn$resolved (expected $spacey_ip)"
print_warning "DNS changes can take time to propagate (up to 24-48 hours)"
echo ""
if ! confirm_action "Continue anyway? (SSL certificates will fail without proper DNS)"; then
exit 1
fi
else
print_error "$headscale_fqdn does not resolve"
print_warning "DNS changes can take time to propagate"
echo ""
if ! confirm_action "Continue anyway? (SSL certificates will fail without proper DNS)"; then
exit 1
fi
fi
else
print_warning "dig command not found, skipping DNS validation"
print_info "Install dnsutils/bind-tools to enable DNS validation"
fi
echo ""
print_success "DNS validation complete"
}
###############################################################################
# Headscale Deployment
###############################################################################
deploy_headscale() {
print_header "Deploying Headscale Server"
cd "$ANSIBLE_DIR"
print_info "This will:"
echo " • Install Headscale on spacey"
echo " • Configure with deny-all ACL policy (you customize later)"
echo " • Create namespace for your network"
echo " • Set up Caddy reverse proxy"
echo " • Configure embedded DERP server"
echo ""
print_warning "After deployment, you MUST configure ACL policies for machines to communicate"
echo ""
if ! confirm_action "Proceed with Headscale deployment?"; then
print_warning "Skipped Headscale deployment"
return 1
fi
print_info "Running: ansible-playbook -i inventory.ini services/headscale/deploy_headscale_playbook.yml"
echo ""
if ansible-playbook -i inventory.ini services/headscale/deploy_headscale_playbook.yml; then
print_success "Headscale deployment complete"
return 0
else
print_error "Headscale deployment failed"
return 1
fi
}
###############################################################################
# Join Machines to Mesh
###############################################################################
join_machines_to_mesh() {
print_header "Join Machines to Mesh (Optional)"
cd "$ANSIBLE_DIR"
print_info "This will install Tailscale client and join machines to your Headscale mesh"
echo ""
# Show available hosts
echo "Available hosts to join:"
for group in vipy watchtower nodito lapy; do
local hosts=$(get_hosts_from_inventory "$group")
if [ -n "$hosts" ]; then
echo " [$group]: $hosts"
fi
done
echo ""
print_info "Join options:"
echo " 1. Join recommended machines (vipy, watchtower, nodito)"
echo " 2. Join all machines"
echo " 3. Custom selection (specify groups)"
echo " 4. Skip - join machines later manually"
echo ""
echo -e -n "${BLUE}Choose option${NC} [1-4]: "
read option
local limit_hosts=""
case "$option" in
1)
limit_hosts="vipy,watchtower,nodito"
print_info "Joining: vipy, watchtower, nodito"
;;
2)
limit_hosts="all"
print_info "Joining: all hosts"
;;
3)
echo -e -n "${BLUE}Enter groups (comma-separated, e.g., vipy,watchtower)${NC}: "
read limit_hosts
print_info "Joining: $limit_hosts"
;;
4)
print_warning "Skipping machine join - you can join manually later"
print_info "To join manually:"
echo " ansible-playbook -i inventory.ini infra/920_join_headscale_mesh.yml --limit <host>"
return 0
;;
*)
print_error "Invalid option"
return 0
;;
esac
echo ""
if ! confirm_action "Proceed with joining machines?"; then
print_warning "Skipped joining machines"
return 0
fi
print_info "Running: ansible-playbook -i inventory.ini infra/920_join_headscale_mesh.yml --limit $limit_hosts"
echo ""
if ansible-playbook -i inventory.ini infra/920_join_headscale_mesh.yml --limit "$limit_hosts"; then
print_success "Machines joined to mesh"
return 0
else
print_error "Failed to join some machines"
print_info "You can retry or join manually later"
return 0
fi
}
###############################################################################
# Backup Configuration
###############################################################################
setup_headscale_backup() {
print_header "Setting Up Headscale Backup (Optional)"
cd "$ANSIBLE_DIR"
print_info "This will set up automated backups to lapy"
echo ""
if ! confirm_action "Set up Headscale backup to lapy?"; then
print_warning "Skipped backup setup"
return 0
fi
# Check if rsync is available
print_info "Verifying rsync is installed on spacey and lapy..."
if ! ansible spacey -i inventory.ini -m shell -a "command -v rsync" &>/dev/null; then
print_error "rsync not found on spacey"
print_info "Run Layer 2 to install rsync"
print_warning "Backup setup skipped - rsync not available"
return 0
fi
print_info "Running: ansible-playbook -i inventory.ini services/headscale/setup_backup_headscale_to_lapy.yml"
echo ""
if ansible-playbook -i inventory.ini services/headscale/setup_backup_headscale_to_lapy.yml; then
print_success "Headscale backup configured"
print_info "Backups will run periodically via cron"
return 0
else
print_error "Backup setup failed"
return 0
fi
}
###############################################################################
# Verification Functions
###############################################################################
verify_deployment() {
print_header "Verifying Headscale Deployment"
cd "$ANSIBLE_DIR"
local ssh_key=$(grep "ansible_ssh_private_key_file" "$ANSIBLE_DIR/inventory.ini" | head -n1 | sed 's/.*ansible_ssh_private_key_file=\([^ ]*\).*/\1/')
ssh_key="${ssh_key/#\~/$HOME}"
local spacey_host=$(get_hosts_from_inventory "spacey")
if [ -z "$spacey_host" ]; then
print_error "Could not determine spacey host"
return
fi
print_info "Checking Headscale on spacey ($spacey_host)..."
echo ""
# Check Headscale service
if timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$spacey_host "systemctl is-active headscale" &>/dev/null; then
print_success "Headscale service running"
else
print_warning "Headscale service not running"
fi
# Check Caddy config
if timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$spacey_host "test -f /etc/caddy/sites-enabled/headscale.conf" &>/dev/null; then
print_success "Headscale Caddy config exists"
else
print_warning "Headscale Caddy config not found"
fi
# Check ACL file
if timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$spacey_host "test -f /etc/headscale/acl.json" &>/dev/null; then
print_success "ACL policy file exists"
else
print_warning "ACL policy file not found"
fi
# List nodes
print_info "Attempting to list connected nodes..."
local nodes_output=$(timeout 5 ssh -i "$ssh_key" -o StrictHostKeyChecking=no -o BatchMode=yes counterweight@$spacey_host "sudo headscale nodes list" 2>/dev/null || echo "")
if [ -n "$nodes_output" ]; then
echo "$nodes_output"
else
print_warning "Could not list nodes (this is normal if no machines joined yet)"
fi
echo ""
}
###############################################################################
# Summary Functions
###############################################################################
print_summary() {
print_header "Layer 5 Setup Complete! 🎉"
echo "Summary of what was configured:"
echo ""
print_success "Headscale VPN server deployed on spacey"
print_success "Caddy reverse proxy configured"
print_success "Namespace created for your network"
echo ""
print_warning "CRITICAL POST-DEPLOYMENT STEPS:"
echo ""
echo "1. Configure ACL Policies (REQUIRED for machines to communicate):"
echo " • SSH to spacey: ssh counterweight@<spacey-ip>"
echo " • Edit ACL: sudo nano /etc/headscale/acl.json"
echo " • Add rules to allow communication"
echo " • Restart: sudo systemctl restart headscale"
echo ""
echo "2. Verify machines joined (if you selected that option):"
echo " • SSH to spacey: ssh counterweight@<spacey-ip>"
echo " • List nodes: sudo headscale nodes list"
echo ""
echo "3. Join additional machines (mobile, desktop):"
echo " • Generate key: sudo headscale preauthkeys create --user <namespace> --reusable"
echo " • On device: tailscale up --login-server https://<headscale-domain> --authkey <key>"
echo ""
print_info "What Headscale enables:"
echo " • Secure mesh networking between all machines"
echo " • Magic DNS - access machines by hostname"
echo " • NAT traversal - works behind firewalls"
echo " • Self-hosted Tailscale alternative"
echo ""
print_info "Next steps:"
echo " 1. Configure ACL policies on spacey"
echo " 2. Verify nodes are connected"
echo " 3. Proceed to Layer 6: ./scripts/setup_layer_6_infra_monitoring.sh"
echo ""
}
###############################################################################
# Main Execution
###############################################################################
main() {
clear
print_header "🔐 Layer 5: VPN Infrastructure (Headscale)"
echo "This script will deploy Headscale for secure mesh networking."
echo ""
print_warning "THIS LAYER IS OPTIONAL"
print_info "Skip to Layer 6 if you don't need VPN mesh networking"
echo ""
if ! confirm_action "Continue with Layer 5 setup?"; then
echo "Setup skipped - proceeding to Layer 6 is fine!"
exit 0
fi
check_prerequisites
check_vars_files
check_dns_configuration
# Deploy Headscale
if deploy_headscale; then
echo ""
join_machines_to_mesh
echo ""
setup_headscale_backup
echo ""
verify_deployment
print_summary
else
print_error "Headscale deployment failed"
exit 1
fi
}
# Run main function
main "$@"