Infrastructure as Code

ansible docker-stack.yml

Docker Stack Deployment

Deploy and configure Docker with common containers across multiple hosts

yaml

---
# Ansible Playbook: Deploy Docker Stack
# Usage: ansible-playbook -i inventory docker-stack.yml

- name: Deploy Docker and common containers
  hosts: docker_hosts
  become: yes
  vars:
    docker_compose_version: "2.24.0"
    containers:
      - name: portainer
        image: portainer/portainer-ce:latest
        ports: ["9443:9443"]
        volumes: ["/var/run/docker.sock:/var/run/docker.sock", "portainer_data:/data"]
      - name: watchtower
        image: containrrr/watchtower:latest
        volumes: ["/var/run/docker.sock:/var/run/docker.sock"]
        environment:
          WATCHTOWER_CLEANUP: "true"
          WATCHTOWER_SCHEDULE: "0 0 4 * * *"

  tasks:
    - name: Install Docker dependencies
      apt:
        name:
          - apt-transport-https
          - ca-certificates
          - curl
          - gnupg
          - lsb-release
        state: present
        update_cache: yes

    - name: Add Docker GPG key
      apt_key:
        url: https://download.docker.com/linux/ubuntu/gpg
        state: present

    - name: Add Docker repository
      apt_repository:
        repo: "deb https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
        state: present

    - name: Install Docker
      apt:
        name:
          - docker-ce
          - docker-ce-cli
          - containerd.io
          - docker-buildx-plugin
          - docker-compose-plugin
        state: present

    - name: Start and enable Docker
      systemd:
        name: docker
        state: started
        enabled: yes

    - name: Deploy containers
      community.docker.docker_container:
        name: "{{ item.name }}"
        image: "{{ item.image }}"
        ports: "{{ item.ports | default(omit) }}"
        volumes: "{{ item.volumes | default(omit) }}"
        env: "{{ item.environment | default(omit) }}"
        restart_policy: unless-stopped
      loop: "{{ containers }}"

ansible tailscale-setup.yml

Tailscale Mesh VPN Setup

Install and configure Tailscale across all nodes with subnet routing

yaml

---
# Ansible Playbook: Tailscale Mesh VPN
# Usage: ansible-playbook -i inventory tailscale-setup.yml -e "tailscale_authkey=tskey-xxx"

- name: Install and configure Tailscale
  hosts: all
  become: yes
  vars:
    tailscale_authkey: "{{ lookup('env', 'TAILSCALE_AUTHKEY') }}"
    subnet_routers:
      - host: alpha-centauri
        advertise_routes: "10.42.0.0/24"
      - host: titawin-host
        advertise_routes: "192.168.20.0/24"

  tasks:
    - name: Add Tailscale repository key
      apt_key:
        url: https://pkgs.tailscale.com/stable/ubuntu/jammy.noarmor.gpg
        state: present

    - name: Add Tailscale repository
      apt_repository:
        repo: "deb https://pkgs.tailscale.com/stable/ubuntu jammy main"
        state: present

    - name: Install Tailscale
      apt:
        name: tailscale
        state: present
        update_cache: yes

    - name: Enable IP forwarding (for subnet routers)
      sysctl:
        name: "{{ item }}"
        value: "1"
        sysctl_set: yes
        reload: yes
      loop:
        - net.ipv4.ip_forward
        - net.ipv6.conf.all.forwarding
      when: inventory_hostname in (subnet_routers | map(attribute='host'))

    - name: Start Tailscale service
      systemd:
        name: tailscaled
        state: started
        enabled: yes

    - name: Authenticate with Tailscale
      command: >
        tailscale up
        --authkey={{ tailscale_authkey }}
        --ssh
        --accept-routes
        {% if inventory_hostname in (subnet_routers | map(attribute='host')) %}
        --advertise-routes={{ (subnet_routers | selectattr('host', 'eq', inventory_hostname) | first).advertise_routes }}
        {% endif %}
      register: tailscale_up
      changed_when: "'Success' in tailscale_up.stdout"

    - name: Get Tailscale status
      command: tailscale status
      register: ts_status
      changed_when: false

    - name: Display Tailscale IP
      debug:
        msg: "{{ inventory_hostname }} Tailscale IP: {{ ts_status.stdout_lines[0] }}"

ansible monitoring-stack.yml

Monitoring Stack Deployment

Deploy Glances, Prometheus node exporter, and configure metrics collection

yaml

---
# Ansible Playbook: Monitoring Stack
# Deploys Glances and Prometheus exporters to all hosts

- name: Deploy monitoring agents
  hosts: all
  become: yes
  vars:
    glances_port: 61208
    node_exporter_port: 9100
    glances_version: "4.0.0"

  tasks:
    - name: Install Python dependencies
      apt:
        name:
          - python3-pip
          - python3-docker
        state: present

    - name: Install Glances via pip
      pip:
        name: glances[all]
        state: present

    - name: Create Glances systemd service
      copy:
        dest: /etc/systemd/system/glances.service
        content: |
          [Unit]
          Description=Glances System Monitor
          After=network.target

          [Service]
          ExecStart=/usr/local/bin/glances -w -p {{ glances_port }}
          Restart=on-failure
          RestartSec=10

          [Install]
          WantedBy=multi-user.target
      notify: Reload systemd

    - name: Download Prometheus Node Exporter
      get_url:
        url: "https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz"
        dest: /tmp/node_exporter.tar.gz

    - name: Extract Node Exporter
      unarchive:
        src: /tmp/node_exporter.tar.gz
        dest: /usr/local/bin
        remote_src: yes
        extra_opts: [--strip-components=1]
        creates: /usr/local/bin/node_exporter

    - name: Create Node Exporter systemd service
      copy:
        dest: /etc/systemd/system/node_exporter.service
        content: |
          [Unit]
          Description=Prometheus Node Exporter
          After=network.target

          [Service]
          ExecStart=/usr/local/bin/node_exporter --web.listen-address=:{{ node_exporter_port }}
          Restart=on-failure

          [Install]
          WantedBy=multi-user.target
      notify: Reload systemd

    - name: Start monitoring services
      systemd:
        name: "{{ item }}"
        state: started
        enabled: yes
      loop:
        - glances
        - node_exporter

  handlers:
    - name: Reload systemd
      systemd:
        daemon_reload: yes

terraform proxmox-vm/main.tf

Proxmox VM Provisioning

Terraform module to create VMs on Proxmox with cloud-init

hcl

# Terraform: Proxmox VM Module
# Creates VMs with cloud-init configuration

terraform {
  required_providers {
    proxmox = {
      source  = "Telmate/proxmox"
      version = "~> 2.9"
    }
  }
}

variable "proxmox_host" {
  description = "Proxmox host IP"
  default     = "10.42.0.201"
}

variable "vm_name" {
  description = "Name of the VM"
  type        = string
}

variable "target_node" {
  description = "Proxmox node to deploy on"
  default     = "icarus"
}

variable "cores" {
  description = "Number of CPU cores"
  default     = 4
}

variable "memory" {
  description = "RAM in MB"
  default     = 4096
}

variable "disk_size" {
  description = "Boot disk size"
  default     = "32G"
}

variable "ip_address" {
  description = "Static IP address"
  type        = string
}

variable "gateway" {
  description = "Network gateway"
  default     = "10.42.0.1"
}

variable "ssh_keys" {
  description = "SSH public keys for cloud-init"
  type        = string
}

resource "proxmox_vm_qemu" "vm" {
  name        = var.vm_name
  target_node = var.target_node
  clone       = "ubuntu-cloud-template"

  cores   = var.cores
  sockets = 1
  memory  = var.memory

  agent = 1  # Enable QEMU guest agent

  disk {
    storage = "local-zfs"
    size    = var.disk_size
    type    = "scsi"
  }

  network {
    model  = "virtio"
    bridge = "vmbr0"
  }

  # Cloud-init configuration
  os_type    = "cloud-init"
  ipconfig0  = "ip=${var.ip_address}/24,gw=${var.gateway}"
  ciuser     = "commander"
  sshkeys    = var.ssh_keys

  lifecycle {
    ignore_changes = [
      network,
    ]
  }

  tags = "terraform,${var.vm_name}"
}

output "vm_ip" {
  value = var.ip_address
}

output "vm_id" {
  value = proxmox_vm_qemu.vm.vmid
}

terraform cloudflare-tunnel/main.tf

Cloudflare Tunnel Configuration

Terraform module for Cloudflare Tunnel and DNS records

hcl

# Terraform: Cloudflare Tunnel
# Manages tunnel configuration and DNS records

terraform {
  required_providers {
    cloudflare = {
      source  = "cloudflare/cloudflare"
      version = "~> 4.0"
    }
  }
}

variable "cloudflare_account_id" {
  description = "Cloudflare account ID"
  type        = string
  sensitive   = true
}

variable "cloudflare_zone_id" {
  description = "Cloudflare zone ID for your domain"
  type        = string
}

variable "domain" {
  description = "Base domain name"
  default     = "argobox.com"
}

variable "tunnel_secret" {
  description = "Tunnel secret (base64)"
  type        = string
  sensitive   = true
}

variable "services" {
  description = "Services to expose through tunnel"
  type = list(object({
    subdomain = string
    service   = string
    port      = number
  }))
  default = [
    { subdomain = "git", service = "localhost", port = 3000 },
    { subdomain = "ai", service = "localhost", port = 30000 },
    { subdomain = "vault", service = "localhost", port = 31745 },
  ]
}

# Create the tunnel
resource "cloudflare_tunnel" "homelab" {
  account_id = var.cloudflare_account_id
  name       = "homelab-tunnel"
  secret     = var.tunnel_secret
}

# Configure tunnel routes
resource "cloudflare_tunnel_config" "homelab" {
  account_id = var.cloudflare_account_id
  tunnel_id  = cloudflare_tunnel.homelab.id

  config {
    dynamic "ingress_rule" {
      for_each = var.services
      content {
        hostname = "${ingress_rule.value.subdomain}.${var.domain}"
        service  = "http://${ingress_rule.value.service}:${ingress_rule.value.port}"
      }
    }

    # Catch-all rule (required)
    ingress_rule {
      service = "http_status:404"
    }
  }
}

# Create DNS records pointing to tunnel
resource "cloudflare_record" "tunnel_dns" {
  for_each = { for s in var.services : s.subdomain => s }

  zone_id = var.cloudflare_zone_id
  name    = each.value.subdomain
  value   = "${cloudflare_tunnel.homelab.id}.cfargotunnel.com"
  type    = "CNAME"
  proxied = true
}

output "tunnel_id" {
  value = cloudflare_tunnel.homelab.id
}

output "tunnel_token" {
  value     = cloudflare_tunnel.homelab.tunnel_token
  sensitive = true
}

shell backup.sh

Automated Backup Script

Rsync-based backup script with rotation and remote sync

bash

#!/bin/bash
# Automated Backup Script with Rotation
# Supports local and remote (rsync over SSH) destinations

set -euo pipefail

# Configuration
BACKUP_NAME="homelab-backup"
SOURCE_DIRS=(
    "/etc"
    "/home/commander"
    "/opt/docker"
    "/var/lib/docker/volumes"
)
LOCAL_DEST="/mnt/backups/${BACKUP_NAME}"
REMOTE_HOST="spica-silo"  # Synology NAS
REMOTE_DEST="/volume1/backups/${HOSTNAME}"
RETENTION_DAYS=30
LOG_FILE="/var/log/backup.log"

# Excludes
EXCLUDES=(
    "*.tmp"
    "*.cache"
    "node_modules"
    ".git"
    "__pycache__"
    "*.log"
)

# Build exclude arguments
EXCLUDE_ARGS=""
for pattern in "${EXCLUDES[@]}"; do
    EXCLUDE_ARGS+="--exclude='${pattern}' "
done

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}

# Create timestamped backup directory
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="${LOCAL_DEST}/${TIMESTAMP}"

log "Starting backup to ${BACKUP_DIR}"

# Create backup directory
mkdir -p "${BACKUP_DIR}"

# Backup each source directory
for src in "${SOURCE_DIRS[@]}"; do
    if [[ -d "$src" ]]; then
        dest_name=$(echo "$src" | tr '/' '_' | sed 's/^_//')
        log "Backing up $src -> ${BACKUP_DIR}/${dest_name}"

        eval rsync -avz --delete ${EXCLUDE_ARGS} \
            "$src/" "${BACKUP_DIR}/${dest_name}/" 2>&1 | tee -a "$LOG_FILE"
    else
        log "WARNING: Source $src does not exist, skipping"
    fi
done

# Create latest symlink
ln -sfn "${BACKUP_DIR}" "${LOCAL_DEST}/latest"

# Sync to remote if available
if ping -c 1 "${REMOTE_HOST}" &> /dev/null; then
    log "Syncing to remote: ${REMOTE_HOST}:${REMOTE_DEST}"
    rsync -avz --delete "${BACKUP_DIR}/" "${REMOTE_HOST}:${REMOTE_DEST}/${TIMESTAMP}/" 2>&1 | tee -a "$LOG_FILE"
    ssh "${REMOTE_HOST}" "ln -sfn ${REMOTE_DEST}/${TIMESTAMP} ${REMOTE_DEST}/latest"
else
    log "WARNING: Remote host ${REMOTE_HOST} unreachable, skipping remote sync"
fi

# Cleanup old backups
log "Cleaning up backups older than ${RETENTION_DAYS} days"
find "${LOCAL_DEST}" -maxdepth 1 -type d -mtime +${RETENTION_DAYS} -exec rm -rf {} \;

if ping -c 1 "${REMOTE_HOST}" &> /dev/null; then
    ssh "${REMOTE_HOST}" "find ${REMOTE_DEST} -maxdepth 1 -type d -mtime +${RETENTION_DAYS} -exec rm -rf {} \;"
fi

log "Backup completed successfully"

# Summary
BACKUP_SIZE=$(du -sh "${BACKUP_DIR}" | cut -f1)
log "Backup size: ${BACKUP_SIZE}"

shell docker-cleanup.sh

Docker Cleanup Script

Safely clean up Docker resources with size reporting

bash

#!/bin/bash
# Docker Cleanup Script
# Removes unused images, containers, volumes, and networks

set -euo pipefail

echo "=== Docker Cleanup Script ==="
echo "Started at: $(date)"
echo

# Show current disk usage
echo "Current Docker disk usage:"
docker system df
echo

# Stop and remove exited containers
EXITED=$(docker ps -aq -f status=exited)
if [[ -n "$EXITED" ]]; then
    echo "Removing exited containers..."
    docker rm $EXITED
else
    echo "No exited containers to remove"
fi

# Remove dangling images
DANGLING=$(docker images -q -f dangling=true)
if [[ -n "$DANGLING" ]]; then
    echo "Removing dangling images..."
    docker rmi $DANGLING
else
    echo "No dangling images to remove"
fi

# Remove unused volumes
echo "Removing unused volumes..."
docker volume prune -f

# Remove unused networks
echo "Removing unused networks..."
docker network prune -f

# Optional: Remove all unused images (uncomment if needed)
# echo "Removing all unused images..."
# docker image prune -a -f

# Final cleanup with system prune
echo "Running system prune..."
docker system prune -f

echo
echo "Final Docker disk usage:"
docker system df

echo
echo "Cleanup completed at: $(date)"

systemd openrc-service-template

OpenRC Service Template

Template for creating OpenRC init scripts (Gentoo/Alpine)

bash

#!/sbin/openrc-run
# OpenRC Service Template
# Place in /etc/init.d/ and chmod +x
# Enable with: rc-update add servicename default

name="myservice"
description="My Custom Service"

# Service configuration
command="/usr/local/bin/myservice"
command_args="--config /etc/myservice/config.yaml"
command_user="commander"
command_group="commander"
command_background=true

# PID file location
pidfile="/run/${RC_SVCNAME}.pid"

# Log configuration
output_log="/var/log/${RC_SVCNAME}.log"
error_log="/var/log/${RC_SVCNAME}.err"

# Dependencies
depend() {
    need net
    after firewall
    use dns logger
}

# Pre-start checks
start_pre() {
    checkpath --directory --owner ${command_user}:${command_group} --mode 0755 /var/lib/myservice
    checkpath --file --owner ${command_user}:${command_group} --mode 0640 /etc/myservice/config.yaml
}

# Custom start function (optional)
start() {
    ebegin "Starting ${name}"
    start-stop-daemon --start \
        --exec ${command} \
        --user ${command_user} \
        --group ${command_group} \
        --background \
        --make-pidfile \
        --pidfile ${pidfile} \
        --stdout ${output_log} \
        --stderr ${error_log} \
        -- ${command_args}
    eend $?
}

# Custom stop function (optional)
stop() {
    ebegin "Stopping ${name}"
    start-stop-daemon --stop \
        --exec ${command} \
        --pidfile ${pidfile}
    eend $?
}

# Status check
status() {
    if [ -f "${pidfile}" ]; then
        if kill -0 $(cat ${pidfile}) 2>/dev/null; then
            einfo "${name} is running (PID: $(cat ${pidfile}))"
            return 0
        fi
    fi
    einfo "${name} is not running"
    return 3
}

systemd backup.timer

Systemd Timer Template

Systemd service and timer for scheduled tasks

ini

# Systemd Timer: backup.timer
# Place in /etc/systemd/system/
# Enable with: systemctl enable --now backup.timer

# === backup.service ===
# [Unit]
# Description=Automated Backup Service
# After=network-online.target
# Wants=network-online.target
#
# [Service]
# Type=oneshot
# ExecStart=/usr/local/bin/backup.sh
# User=root
# StandardOutput=journal
# StandardError=journal
#
# [Install]
# WantedBy=multi-user.target

# === backup.timer ===
[Unit]
Description=Run backup daily at 3 AM

[Timer]
# Run at 3:00 AM every day
OnCalendar=*-*-* 03:00:00

# Add randomized delay up to 15 minutes
RandomizedDelaySec=900

# Run immediately if we missed the last scheduled time
Persistent=true

# Don't run if system was just booted
OnBootSec=5min

[Install]
WantedBy=timers.target

# === Useful timer expressions ===
# OnCalendar=hourly           # Every hour
# OnCalendar=daily            # Every day at midnight
# OnCalendar=weekly           # Every Monday at midnight
# OnCalendar=*-*-* 04:00:00   # Every day at 4 AM
# OnCalendar=Mon *-*-* 02:00  # Every Monday at 2 AM
# OnCalendar=*-*-01 00:00:00  # First of every month

# === Commands ===
# systemctl list-timers              # List all timers
# systemctl status backup.timer      # Check timer status
# systemctl start backup.service     # Run manually
# journalctl -u backup.service       # View logs

shell zfs-snapshot.sh

ZFS Snapshot Management

Automated ZFS snapshots with retention policy

bash

#!/bin/bash
# ZFS Snapshot Management Script
# Creates snapshots with automatic rotation

set -euo pipefail

# Configuration
POOL="tank-storage"
SNAPSHOT_PREFIX="auto"
HOURLY_KEEP=24
DAILY_KEEP=30
WEEKLY_KEEP=12

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

# Create snapshot
create_snapshot() {
    local snap_type="$1"
    local timestamp=$(date +%Y%m%d_%H%M%S)
    local snap_name="${POOL}@${SNAPSHOT_PREFIX}_${snap_type}_${timestamp}"

    log "Creating snapshot: ${snap_name}"
    zfs snapshot -r "${snap_name}"
}

# List snapshots by type
list_snapshots() {
    local snap_type="$1"
    zfs list -t snapshot -o name -s creation | grep "${POOL}@${SNAPSHOT_PREFIX}_${snap_type}_" || true
}

# Delete old snapshots
cleanup_snapshots() {
    local snap_type="$1"
    local keep="$2"

    local snapshots=($(list_snapshots "${snap_type}"))
    local count=${#snapshots[@]}

    if (( count > keep )); then
        local to_delete=$((count - keep))
        log "Cleaning up ${to_delete} old ${snap_type} snapshots"

        for ((i=0; i<to_delete; i++)); do
            log "Deleting: ${snapshots[i]}"
            zfs destroy -r "${snapshots[i]}"
        done
    else
        log "No ${snap_type} snapshots to clean (have ${count}, keep ${keep})"
    fi
}

# Main logic based on argument
case "${1:-hourly}" in
    hourly)
        create_snapshot "hourly"
        cleanup_snapshots "hourly" $HOURLY_KEEP
        ;;
    daily)
        create_snapshot "daily"
        cleanup_snapshots "daily" $DAILY_KEEP
        ;;
    weekly)
        create_snapshot "weekly"
        cleanup_snapshots "weekly" $WEEKLY_KEEP
        ;;
    list)
        echo "=== Hourly Snapshots ==="
        list_snapshots "hourly"
        echo
        echo "=== Daily Snapshots ==="
        list_snapshots "daily"
        echo
        echo "=== Weekly Snapshots ==="
        list_snapshots "weekly"
        ;;
    status)
        echo "=== ZFS Pool Status ==="
        zpool status $POOL
        echo
        echo "=== Snapshot Counts ==="
        echo "Hourly: $(list_snapshots hourly | wc -l) / $HOURLY_KEEP"
        echo "Daily:  $(list_snapshots daily | wc -l) / $DAILY_KEEP"
        echo "Weekly: $(list_snapshots weekly | wc -l) / $WEEKLY_KEEP"
        ;;
    *)
        echo "Usage: $0 {hourly|daily|weekly|list|status}"
        exit 1
        ;;
esac

log "Done"

Infrastructure as Code

Docker Stack Deployment

Tailscale Mesh VPN Setup

Monitoring Stack Deployment

Proxmox VM Provisioning

Cloudflare Tunnel Configuration

Automated Backup Script

Docker Cleanup Script

OpenRC Service Template

Systemd Timer Template

ZFS Snapshot Management

System Status

🌐 Gateway

🚀 Orchestrators

🤖 Build Drones

🔨 Active Builds