#!/usr/bin/env bash
# shellcheck shell=bash
# =============================================================================
# llmdeal.me consortium - machine onboarding - v0.1
#
# Bootstraps a community GPU/server into the llmdeal consortium routing
# pool. Walks the operator through 8 steps:
#
#   1. Prereqs       - curl, jq, systemctl, sudo, lspci, optional nvidia-smi
#   2. Identity      - validate consortium token, register with control plane
#   3. Inference     - detect local vLLM / TGI / ollama / llama.cpp server
#   4. Models        - pick which models to expose + map to llmdeal aliases
#   5. Tunnel        - cloudflared (recommended) | reverse SSH | manual
#   6. Systemd       - install heartbeat unit, enable + start
#   7. Smoke test    - probe local server, then phone home through tunnel
#   8. Summary       - print machine id, models, tunnel URL, monitoring URL
#
# Install:
#   curl -sSf https://llmdeal.me/setup/consortium.sh -o consortium-setup.sh
#   chmod +x consortium-setup.sh
#   sudo ./consortium-setup.sh
#
# Re-runs are safe: the script detects prior config and offers re-configure
# or skip.
#
# License: MIT (consortium-side runtime); the llmdeal control plane itself
# remains proprietary.
# =============================================================================

set -euo pipefail

# -----------------------------------------------------------------------------
# Constants
# -----------------------------------------------------------------------------
readonly SCRIPT_VERSION="0.1.0"
readonly CONTROL_PLANE_BASE="${CONTROL_PLANE_BASE:-https://llmdeal.me}"
readonly CONFIG_DIR="/etc/llmdeal"
readonly CONFIG_FILE="${CONFIG_DIR}/consortium.env"
readonly LOG_FILE="/var/log/llmdeal-consortium-setup.log"
readonly SYSTEMD_UNIT="/etc/systemd/system/llmdeal-consortium.service"
readonly HEARTBEAT_SCRIPT="/usr/local/bin/llmdeal-consortium-heartbeat.sh"
readonly TOKEN_REGEX='^cstm_[a-f0-9]{32}$'
readonly REQUIRED_BINS=(curl jq systemctl sudo lspci)

# -----------------------------------------------------------------------------
# CLI flag state
# -----------------------------------------------------------------------------
FLAG_NON_INTERACTIVE=0
FLAG_DRY_RUN=0
FLAG_QUIET=0
FLAG_RECONFIGURE=0
FLAG_CONFIG_PATH=""
FLAG_INSECURE_PIPE=0  # set when invoked via `curl | bash` shortcut

# -----------------------------------------------------------------------------
# Colors (only if stdout is a TTY)
# -----------------------------------------------------------------------------
if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then
  C_RESET=$'\033[0m'
  C_GREEN=$'\033[32m'    # accent
  C_BOLD=$'\033[1m'
  C_DIM=$'\033[2m'
  C_RED=$'\033[31m'
  C_YELLOW=$'\033[33m'
  C_BLUE=$'\033[34m'
else
  C_RESET=""
  C_GREEN=""
  C_BOLD=""
  C_DIM=""
  C_RED=""
  C_YELLOW=""
  C_BLUE=""
fi

# -----------------------------------------------------------------------------
# Logging
# -----------------------------------------------------------------------------
_ensure_log() {
  # Best-effort: if we can't create the log file (no root), keep going but
  # only log to stdout.
  if ! touch "$LOG_FILE" 2>/dev/null; then
    LOG_FILE=""
  else
    chmod 0640 "$LOG_FILE" 2>/dev/null || true
  fi
}

log() {
  local ts msg
  ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
  msg="$*"
  if [[ -n "$LOG_FILE" ]]; then
    printf '[%s] %s\n' "$ts" "$msg" >>"$LOG_FILE" 2>/dev/null || true
  fi
  if (( FLAG_QUIET == 0 )); then
    printf '%s\n' "$msg"
  fi
}

info()  { log "${C_DIM}INFO ${C_RESET} $*"; }
ok()    { log "${C_GREEN}OK   ${C_RESET} $*"; }
warn()  { log "${C_YELLOW}WARN ${C_RESET} $*" >&2; }
err()   { log "${C_RED}ERR  ${C_RESET} $*" >&2; }
fatal() { err "$*"; exit 1; }

banner() {
  (( FLAG_QUIET == 1 )) && return 0
  cat <<EOF
${C_GREEN}${C_BOLD}
==============================================================================
  llmdeal.me consortium - machine onboarding - v${SCRIPT_VERSION}
==============================================================================
${C_RESET}${C_DIM}
  This wizard takes you from "machine registered" to "serving inference
  to llmdeal customers" in about 10 minutes.

  Logs:   ${LOG_FILE:-<stdout only>}
  Config: ${FLAG_CONFIG_PATH:-$CONFIG_FILE}
${C_RESET}
EOF
}

section() {
  (( FLAG_QUIET == 1 )) && return 0
  printf '\n%s%s>> %s%s\n' "$C_GREEN" "$C_BOLD" "$*" "$C_RESET"
  printf '%s----------------------------------------------------------------------%s\n' \
    "$C_DIM" "$C_RESET"
}

# -----------------------------------------------------------------------------
# Dry-run wrapper
# -----------------------------------------------------------------------------
run() {
  # Echo + execute, unless dry-run.
  if (( FLAG_DRY_RUN == 1 )); then
    printf '%s[dry-run]%s %s\n' "$C_YELLOW" "$C_RESET" "$*"
    return 0
  fi
  log "${C_DIM}exec${C_RESET} $*"
  "$@"
}

# -----------------------------------------------------------------------------
# Usage
# -----------------------------------------------------------------------------
usage() {
  cat <<EOF
${C_BOLD}llmdeal.me consortium setup v${SCRIPT_VERSION}${C_RESET}

Usage:
  sudo $(basename "$0") [options]

Options:
  --non-interactive      Read all config from --config; no prompts.
  --config <path>        Alternate config file (default: ${CONFIG_FILE})
  --dry-run              Print every action, do not mutate the system.
  --quiet                Suppress banner + progress; errors only.
  --reconfigure          Re-run setup, overwriting any existing config.
  --help, -h             Show this help and exit.

Environment overrides:
  CONSORTIUM_TOKEN       Pre-supply the consortium token (skips prompt).
  CONTROL_PLANE_BASE     Override the llmdeal control-plane URL.
  NO_COLOR=1             Force-disable ANSI colors.

One-liner (review the script first; use --quiet only for re-runs):
  curl -sSf ${CONTROL_PLANE_BASE}/setup/consortium.sh | sudo bash -s -- --help

Files written:
  ${CONFIG_FILE}                          (mode 0600, owner root)
  ${SYSTEMD_UNIT}
  ${HEARTBEAT_SCRIPT}
  ${LOG_FILE}

After install: ${C_DIM}systemctl status llmdeal-consortium${C_RESET}
EOF
}

# -----------------------------------------------------------------------------
# Parse args
# -----------------------------------------------------------------------------
parse_args() {
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --non-interactive) FLAG_NON_INTERACTIVE=1 ;;
      --dry-run)         FLAG_DRY_RUN=1 ;;
      --quiet)           FLAG_QUIET=1 ;;
      --reconfigure)     FLAG_RECONFIGURE=1 ;;
      --insecure-pipe)   FLAG_INSECURE_PIPE=1 ;;
      --config)
        shift
        [[ $# -gt 0 ]] || fatal "--config requires an argument"
        FLAG_CONFIG_PATH="$1"
        ;;
      --help|-h)         usage; exit 0 ;;
      --version)         echo "$SCRIPT_VERSION"; exit 0 ;;
      *)
        err "unknown flag: $1"
        usage >&2
        exit 2
        ;;
    esac
    shift
  done

  if (( FLAG_NON_INTERACTIVE == 1 )) && [[ -z "$FLAG_CONFIG_PATH" ]]; then
    FLAG_CONFIG_PATH="$CONFIG_FILE"
  fi
}

# -----------------------------------------------------------------------------
# Privilege check
# -----------------------------------------------------------------------------
require_root() {
  if (( FLAG_DRY_RUN == 1 )); then
    return 0
  fi
  if [[ "${EUID:-$(id -u)}" -ne 0 ]]; then
    fatal "this script writes to /etc and /usr/local; please run with sudo."
  fi
}

# -----------------------------------------------------------------------------
# Prompts (suppressed in non-interactive mode)
# -----------------------------------------------------------------------------
ask() {
  # ask "prompt" "default"
  local prompt="$1"
  local default="${2:-}"
  local response
  if (( FLAG_NON_INTERACTIVE == 1 )); then
    printf '%s\n' "$default"
    return 0
  fi
  if [[ -n "$default" ]]; then
    read -r -p "${C_BOLD}?${C_RESET} ${prompt} [${default}]: " response || true
    response="${response:-$default}"
  else
    read -r -p "${C_BOLD}?${C_RESET} ${prompt}: " response || true
  fi
  printf '%s\n' "$response"
}

ask_yn() {
  # ask_yn "prompt" "y"  ->  returns 0 for yes, 1 for no
  local prompt="$1"
  local default="${2:-y}"
  local hint
  case "$default" in
    y|Y) hint="Y/n" ;;
    *)   hint="y/N" ;;
  esac
  if (( FLAG_NON_INTERACTIVE == 1 )); then
    [[ "$default" =~ ^[yY]$ ]] && return 0 || return 1
  fi
  local response
  read -r -p "${C_BOLD}?${C_RESET} ${prompt} [${hint}]: " response || true
  response="${response:-$default}"
  [[ "$response" =~ ^[yY]([eE][sS])?$ ]]
}

ask_secret() {
  # ask_secret "prompt"  - silent input (no echo)
  local prompt="$1"
  local response
  if (( FLAG_NON_INTERACTIVE == 1 )); then
    # Non-interactive uses env / config; never echo to ps args.
    printf '%s\n' "${CONSORTIUM_TOKEN:-}"
    return 0
  fi
  read -r -s -p "${C_BOLD}?${C_RESET} ${prompt}: " response || true
  printf '\n' >&2
  printf '%s\n' "$response"
}

# -----------------------------------------------------------------------------
# OS detect + package install helper
# -----------------------------------------------------------------------------
OS_ID=""
OS_LIKE=""
PKG_MGR=""
PKG_INSTALL=""

detect_os() {
  if [[ -r /etc/os-release ]]; then
    # shellcheck disable=SC1091
    source /etc/os-release
    OS_ID="${ID:-unknown}"
    OS_LIKE="${ID_LIKE:-}"
  fi
  case "${OS_ID}:${OS_LIKE}" in
    ubuntu*|debian*|*debian*|*ubuntu*)
      PKG_MGR="apt-get"; PKG_INSTALL="apt-get install -y" ;;
    fedora*|*rhel*|*centos*|rocky*|alma*)
      PKG_MGR="dnf";     PKG_INSTALL="dnf install -y" ;;
    arch*|manjaro*|*arch*)
      PKG_MGR="pacman";  PKG_INSTALL="pacman -S --noconfirm" ;;
    opensuse*|suse*)
      PKG_MGR="zypper";  PKG_INSTALL="zypper install -y" ;;
    *)
      PKG_MGR="";  PKG_INSTALL="" ;;
  esac
  info "OS: ${OS_ID:-unknown} (pkg manager: ${PKG_MGR:-none-detected})"
}

# Map "logical" prereq name -> distro-specific package name.
pkg_for() {
  local bin="$1"
  case "$bin:$PKG_MGR" in
    lspci:apt-get) echo "pciutils" ;;
    lspci:dnf)     echo "pciutils" ;;
    lspci:pacman)  echo "pciutils" ;;
    lspci:zypper)  echo "pciutils" ;;
    *) echo "$bin" ;;
  esac
}

install_pkg() {
  local bin="$1"
  local pkg
  pkg="$(pkg_for "$bin")"
  if [[ -z "$PKG_INSTALL" ]]; then
    warn "no package manager detected; install '$pkg' manually."
    return 1
  fi
  if ! ask_yn "Install '$pkg' via $PKG_MGR?" "y"; then
    warn "skipped installing $pkg; script may fail later."
    return 1
  fi
  # shellcheck disable=SC2086
  run $PKG_INSTALL "$pkg"
}

# -----------------------------------------------------------------------------
# Step 1: prereqs
# -----------------------------------------------------------------------------
step_prereqs() {
  section "Step 1 / 8 - Prerequisites"
  detect_os

  local missing=()
  for bin in "${REQUIRED_BINS[@]}"; do
    if command -v "$bin" >/dev/null 2>&1; then
      info "found: $bin"
    else
      warn "missing: $bin"
      missing+=("$bin")
    fi
  done

  if (( ${#missing[@]} > 0 )); then
    for bin in "${missing[@]}"; do
      install_pkg "$bin" || warn "$bin still missing"
    done
  fi

  # GPU detection (best-effort).
  if command -v nvidia-smi >/dev/null 2>&1; then
    local gpu
    gpu="$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n1 || true)"
    if [[ -n "$gpu" ]]; then
      ok "NVIDIA GPU: $gpu"
      GPU_MODEL="$gpu"
    else
      warn "nvidia-smi available but no GPU rows; running headless?"
      GPU_MODEL="nvidia-unknown"
    fi
  else
    # Try lspci as a fallback.
    if command -v lspci >/dev/null 2>&1; then
      GPU_MODEL="$(lspci 2>/dev/null | awk -F: '/VGA|3D|Display/ {print $3}' | head -n1 | sed 's/^ *//;s/ *$//')"
      GPU_MODEL="${GPU_MODEL:-unknown}"
      info "GPU via lspci: $GPU_MODEL"
    else
      GPU_MODEL="unknown"
    fi
  fi
}

# -----------------------------------------------------------------------------
# Step 2: identity (token validate + remote verify)
# -----------------------------------------------------------------------------
TOKEN=""
MACHINE_ID=""
EXPECTED_MODELS_JSON="[]"

step_identity() {
  section "Step 2 / 8 - Consortium identity"

  if [[ -f "$CONFIG_FILE" ]] && (( FLAG_RECONFIGURE == 0 )); then
    info "existing config detected at $CONFIG_FILE"
    if ask_yn "Re-use the existing token? (No = paste a new one)" "y"; then
      # shellcheck disable=SC1090
      source "$CONFIG_FILE"
      TOKEN="${CONSORTIUM_TOKEN:-}"
    fi
  fi

  if [[ -z "$TOKEN" ]] && [[ -n "${CONSORTIUM_TOKEN:-}" ]]; then
    TOKEN="$CONSORTIUM_TOKEN"
    info "using token from CONSORTIUM_TOKEN env"
  fi

  if [[ -z "$TOKEN" ]] && (( FLAG_NON_INTERACTIVE == 1 )); then
    if [[ -n "$FLAG_CONFIG_PATH" ]] && [[ -f "$FLAG_CONFIG_PATH" ]]; then
      # shellcheck disable=SC1090
      source "$FLAG_CONFIG_PATH"
      TOKEN="${CONSORTIUM_TOKEN:-}"
    fi
  fi

  if [[ -z "$TOKEN" ]]; then
    cat <<EOF

  The operator should have DMed you a token shaped like:
      cstm_<32-hex-chars>
  e.g. cstm_a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6
EOF
    TOKEN="$(ask_secret "Paste your consortium token")"
  fi

  # Local format validation.
  if ! [[ "$TOKEN" =~ $TOKEN_REGEX ]]; then
    fatal "token format invalid (expected cstm_<32-hex-chars>)"
  fi
  ok "token format valid"

  # Phone home.
  local hostname kernel body http_code resp
  hostname="$(hostname -f 2>/dev/null || hostname)"
  kernel="$(uname -srm)"
  body=$(jq -n \
    --arg token "$TOKEN" \
    --arg hostname "$hostname" \
    --arg kernel "$kernel" \
    --arg gpu_model "${GPU_MODEL:-unknown}" \
    '{token:$token, hostname:$hostname, kernel:$kernel, gpu_model:$gpu_model}')

  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would POST ${CONTROL_PLANE_BASE}/api/consortium/verify"
    MACHINE_ID="cmach_DRYRUN0000000000000000000"
    EXPECTED_MODELS_JSON="[]"
    return 0
  fi

  resp="$(curl -sS -m 20 -X POST \
    -H 'Content-Type: application/json' \
    -w '\n__HTTP_%{http_code}__' \
    -d "$body" \
    "${CONTROL_PLANE_BASE}/api/consortium/verify" || true)"

  http_code="$(printf '%s' "$resp" | grep -oE '__HTTP_[0-9]+__' | tail -n1 | tr -dc 0-9)"
  resp="$(printf '%s' "$resp" | sed -E 's/__HTTP_[0-9]+__$//')"

  if [[ "$http_code" != "200" ]]; then
    err "control-plane rejected token (HTTP ${http_code:-???})"
    err "response: $(printf '%s' "$resp" | head -c 400)"
    fatal "ask the operator to confirm your token is registered."
  fi

  MACHINE_ID="$(printf '%s' "$resp" | jq -r '.machine_id // empty')"
  EXPECTED_MODELS_JSON="$(printf '%s' "$resp" | jq -c '.expected_models // []')"

  [[ -n "$MACHINE_ID" ]] || fatal "control plane returned no machine_id"
  ok "registered as ${MACHINE_ID}"
  if [[ "$EXPECTED_MODELS_JSON" != "[]" ]]; then
    info "expected models: $(printf '%s' "$EXPECTED_MODELS_JSON" | jq -r 'join(", ")')"
  fi
}

# -----------------------------------------------------------------------------
# Step 3: detect inference server
# -----------------------------------------------------------------------------
INFER_KIND=""        # vllm | tgi | ollama | llamacpp | manual
INFER_URL=""         # base URL, e.g. http://127.0.0.1:8000
INFER_MODELS_RAW=""  # newline-separated list of detected model ids

probe_url() {
  curl -sS -m 3 -o /dev/null -w '%{http_code}' "$1" 2>/dev/null || echo "000"
}

detect_vllm() {
  for port in 8000 8001 8080; do
    if [[ "$(probe_url "http://127.0.0.1:${port}/v1/models")" == "200" ]]; then
      echo "http://127.0.0.1:${port}"
      return 0
    fi
  done
  return 1
}
detect_ollama() {
  if [[ "$(probe_url "http://127.0.0.1:11434/api/tags")" == "200" ]]; then
    echo "http://127.0.0.1:11434"
    return 0
  fi
  return 1
}
detect_tgi() {
  for port in 3000 80 8080; do
    if [[ "$(probe_url "http://127.0.0.1:${port}/info")" == "200" ]]; then
      echo "http://127.0.0.1:${port}"
      return 0
    fi
  done
  return 1
}
detect_llamacpp() {
  for port in 8080 8000; do
    # llama.cpp server health endpoint
    local code
    code="$(probe_url "http://127.0.0.1:${port}/health")"
    if [[ "$code" == "200" ]] && [[ -z "$INFER_KIND" ]]; then
      echo "http://127.0.0.1:${port}"
      return 0
    fi
  done
  return 1
}

step_detect_server() {
  section "Step 3 / 8 - Detect local inference server"

  local found=()
  local url
  if url="$(detect_vllm)"; then     found+=("vllm@$url"); fi
  if url="$(detect_ollama)"; then   found+=("ollama@$url"); fi
  if url="$(detect_tgi)"; then      found+=("tgi@$url"); fi
  if url="$(detect_llamacpp)"; then found+=("llamacpp@$url"); fi

  if (( ${#found[@]} == 0 )); then
    warn "no inference server detected on common ports."
    cat <<EOF

  llmdeal can route to any OpenAI-compatible /v1/chat/completions server.
  Common installs:
    vLLM:    pip install vllm  &&  python -m vllm.entrypoints.openai.api_server \\
                                       --model meta-llama/Llama-3.3-70B-Instruct
    ollama:  curl -fsSL https://ollama.com/install.sh | sh  &&  ollama serve
    TGI:     docker run --gpus all -p 3000:80 ghcr.io/huggingface/text-generation-inference

EOF
    if ask_yn "Specify the inference URL manually?" "n"; then
      INFER_KIND="manual"
      INFER_URL="$(ask "Inference URL (OpenAI-compatible /v1)" "http://127.0.0.1:8000")"
    else
      fatal "install an inference server, then re-run this script."
    fi
  elif (( ${#found[@]} == 1 )); then
    info "detected: ${found[0]}"
    pick_server "${found[0]}"
  else
    info "multiple inference servers detected."
    local i=1
    for entry in "${found[@]}"; do
      printf '  [%d] %s\n' "$i" "$entry"
      (( i++ ))
    done
    printf '  [%d] specify URL manually\n' "$i"
    local choice
    choice="$(ask "Which server to expose to llmdeal?" "1")"
    if [[ "$choice" =~ ^[0-9]+$ ]] && (( choice >= 1 )) && (( choice <= ${#found[@]} )); then
      pick_server "${found[$((choice - 1))]}"
    else
      INFER_KIND="manual"
      INFER_URL="$(ask "Inference URL" "http://127.0.0.1:8000")"
    fi
  fi

  ok "inference server: ${INFER_KIND} @ ${INFER_URL}"
  detect_models
}

pick_server() {
  local entry="$1"
  INFER_KIND="${entry%%@*}"
  INFER_URL="${entry#*@}"
}

detect_models() {
  case "$INFER_KIND" in
    vllm|manual)
      INFER_MODELS_RAW="$(curl -sS -m 5 "${INFER_URL}/v1/models" 2>/dev/null | \
        jq -r '.data[]?.id // empty' 2>/dev/null || true)"
      ;;
    ollama)
      INFER_MODELS_RAW="$(curl -sS -m 5 "${INFER_URL}/api/tags" 2>/dev/null | \
        jq -r '.models[]?.name // empty' 2>/dev/null || true)"
      ;;
    tgi)
      # TGI exposes a single model at /info; query model_id field.
      INFER_MODELS_RAW="$(curl -sS -m 5 "${INFER_URL}/info" 2>/dev/null | \
        jq -r '.model_id // empty' 2>/dev/null || true)"
      ;;
    llamacpp)
      # llama.cpp /v1/models is also OpenAI-compatible.
      INFER_MODELS_RAW="$(curl -sS -m 5 "${INFER_URL}/v1/models" 2>/dev/null | \
        jq -r '.data[]?.id // empty' 2>/dev/null || true)"
      ;;
  esac
  if [[ -z "$INFER_MODELS_RAW" ]]; then
    warn "couldn't auto-list models; you'll be asked to type them in step 4."
  fi
}

# -----------------------------------------------------------------------------
# Step 4: model selection + alias mapping
# -----------------------------------------------------------------------------
declare -a SELECTED_MODELS=()   # raw ids from local server
declare -a SELECTED_ALIASES=()  # llmdeal aliases (1:1 with SELECTED_MODELS)

suggest_alias() {
  # Map e.g. "meta-llama/Llama-3.3-70B-Instruct" -> "llama-3.3-70b"
  local raw="$1"
  local stripped="${raw##*/}"
  # Lowercase, drop trailing -instruct, condense punctuation.
  printf '%s' "$stripped" | tr '[:upper:]' '[:lower:]' | \
    sed -E 's/-instruct$//; s/[._]+/-/g; s/-+/-/g; s/^-+//; s/-+$//'
}

step_models() {
  section "Step 4 / 8 - Model selection + llmdeal alias mapping"

  local -a models=()
  if [[ -n "$INFER_MODELS_RAW" ]]; then
    # shellcheck disable=SC2206
    while IFS= read -r line; do
      [[ -z "$line" ]] && continue
      models+=("$line")
    done <<<"$INFER_MODELS_RAW"
  fi

  if (( ${#models[@]} == 0 )); then
    warn "no models auto-detected."
    if (( FLAG_NON_INTERACTIVE == 1 )); then
      if [[ -n "${CONSORTIUM_MODELS:-}" ]]; then
        info "using CONSORTIUM_MODELS from config: $CONSORTIUM_MODELS"
        IFS=',' read -ra models <<<"$CONSORTIUM_MODELS"
      else
        fatal "non-interactive mode + no detected models; set CONSORTIUM_MODELS=a,b,c in config."
      fi
    else
      local typed
      typed="$(ask "Type a comma-separated model list to expose" "")"
      IFS=',' read -ra models <<<"$typed"
    fi
  fi

  info "detected models on local server:"
  local i=1
  for m in "${models[@]}"; do
    printf '  [%d] %s\n' "$i" "$m"
    (( i++ ))
  done

  local picks
  if (( FLAG_NON_INTERACTIVE == 1 )); then
    picks="all"
  else
    picks="$(ask "Which to expose? (csv of numbers, 'all', or 'none')" "all")"
  fi

  local -a chosen_idx=()
  case "$picks" in
    all)  for ((j=0; j<${#models[@]}; j++)); do chosen_idx+=("$j"); done ;;
    none) chosen_idx=() ;;
    *)
      IFS=',' read -ra nums <<<"$picks"
      for n in "${nums[@]}"; do
        n="${n// /}"
        [[ "$n" =~ ^[0-9]+$ ]] || continue
        if (( n >= 1 )) && (( n <= ${#models[@]} )); then
          chosen_idx+=("$((n - 1))")
        fi
      done
      ;;
  esac

  if (( ${#chosen_idx[@]} == 0 )); then
    warn "no models selected; consortium can still register but won't serve anything."
  fi

  for idx in "${chosen_idx[@]}"; do
    local raw="${models[$idx]}"
    local suggested
    suggested="$(suggest_alias "$raw")"
    local alias
    if (( FLAG_NON_INTERACTIVE == 1 )); then
      alias="$suggested"
    else
      alias="$(ask "Map '$raw' to alias" "$suggested")"
    fi
    [[ -z "$alias" ]] && continue
    SELECTED_MODELS+=("$raw")
    SELECTED_ALIASES+=("$alias")
    ok "  $raw -> $alias"
  done
}

# -----------------------------------------------------------------------------
# Step 5: tunnel setup
# -----------------------------------------------------------------------------
TUNNEL_KIND=""   # cloudflared | autossh | manual
TUNNEL_HOSTNAME=""
TUNNEL_LOCAL_PORT=""

step_tunnel() {
  section "Step 5 / 8 - Tunnel setup"

  # Extract local port from INFER_URL.
  TUNNEL_LOCAL_PORT="$(echo "$INFER_URL" | sed -E 's#^https?://[^/]*:([0-9]+).*$#\1#; t; s#.*#80#')"

  cat <<EOF

  llmdeal needs a publicly addressable URL to reach your inference server.
  Four options:

    [1] Quick Cloudflare tunnel (RECOMMENDED, default)
        Spins up a free *.trycloudflare.com URL with no account, no DNS,
        no config files. The tunnel host gets posted to llmdeal control
        plane automatically. Re-derives a fresh URL on every restart.
    [2] Named Cloudflare tunnel (advanced)
        Requires a Cloudflare account; lets you keep a stable subdomain.
    [3] Reverse SSH to tunnel.llmdeal.me   - simple, one autossh process.
    [4] Manual                             - skip install, you wire it up.

  Security note: options 1 and 2 keep your machine fully behind NAT. Option
  3 opens an outbound SSH session only. Option 4 means YOU are responsible
  for making port ${TUNNEL_LOCAL_PORT} reachable from the public internet.

EOF
  local choice
  if (( FLAG_NON_INTERACTIVE == 1 )); then
    choice="${CONSORTIUM_TUNNEL_KIND:-1}"
  else
    choice="$(ask "Choice" "1")"
  fi

  case "$choice" in
    1|quick|quickcloudflared) setup_quick_cloudflared ;;
    2|cloudflared)            setup_cloudflared ;;
    3|autossh)                setup_autossh ;;
    *)                        setup_manual_tunnel ;;
  esac
}

# -----------------------------------------------------------------------------
# Quick cloudflared - no-account *.trycloudflare.com tunnel via systemd.
# -----------------------------------------------------------------------------
setup_quick_cloudflared() {
  TUNNEL_KIND="quick-cloudflared"

  if ! command -v cloudflared >/dev/null 2>&1; then
    warn "cloudflared not installed."
    if ask_yn "Install cloudflared via the Cloudflare apt repo?" "y"; then
      if [[ "$PKG_MGR" == "apt-get" ]]; then
        run bash -c '
          set -e
          mkdir -p --mode=0755 /usr/share/keyrings
          curl -fsSL https://pkg.cloudflare.com/cloudflare-main.gpg | tee /usr/share/keyrings/cloudflare-main.gpg >/dev/null
          echo "deb [signed-by=/usr/share/keyrings/cloudflare-main.gpg] https://pkg.cloudflare.com/cloudflared $(lsb_release -cs 2>/dev/null || echo bookworm) main" \
            | tee /etc/apt/sources.list.d/cloudflared.list
          apt-get update -y && apt-get install -y cloudflared
        '
      else
        warn "auto-install only wired for apt; download from https://github.com/cloudflare/cloudflared/releases and re-run."
        TUNNEL_KIND="manual"
        setup_manual_tunnel
        return 0
      fi
    else
      warn "cloudflared install skipped; falling back to manual."
      TUNNEL_KIND="manual"
      setup_manual_tunnel
      return 0
    fi
  fi

  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would install systemd unit llmdeal-consortium-tunnel.service"
    info "[dry-run] would parse stdout for https://*.trycloudflare.com URL"
    TUNNEL_HOSTNAME="https://dryrun-example.trycloudflare.com"
    return 0
  fi

  # Write a systemd unit that runs `cloudflared tunnel --url http://localhost:PORT`
  # in foreground, journaled, restarting on failure.
  local unit=/etc/systemd/system/llmdeal-consortium-tunnel.service
  cat >"$unit" <<EOF_TUN
[Unit]
Description=llmdeal consortium quick cloudflared tunnel
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
ExecStart=/usr/bin/cloudflared tunnel --no-autoupdate --url http://localhost:${TUNNEL_LOCAL_PORT}
Restart=on-failure
RestartSec=5
# cloudflared prints the trycloudflare URL to stderr; capture both into the journal.
StandardOutput=journal
StandardError=journal

# Hardening - tunnel binary needs no special privileges.
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true

[Install]
WantedBy=multi-user.target
EOF_TUN
  chmod 0644 "$unit"
  ok "wrote $unit"

  run systemctl daemon-reload
  run systemctl enable --now llmdeal-consortium-tunnel.service

  # cloudflared prints the URL within 5-10 seconds — poll the journal until
  # we find it (or give up after ~45 s).
  info "waiting for cloudflared to assign a public URL..."
  local url=""
  local attempt
  for attempt in $(seq 1 30); do
    sleep 2
    url="$(journalctl -u llmdeal-consortium-tunnel.service --since '60 seconds ago' --no-pager 2>/dev/null \
      | grep -oE 'https://[a-z0-9-]+\.trycloudflare\.com' \
      | tail -n1 || true)"
    if [[ -n "$url" ]]; then
      break
    fi
  done

  if [[ -z "$url" ]]; then
    warn "could not detect a trycloudflare URL within 60s — check 'journalctl -u llmdeal-consortium-tunnel'"
    warn "you can re-run with --reconfigure once cloudflared stabilises, or paste the URL manually now."
    if (( FLAG_NON_INTERACTIVE == 0 )); then
      url="$(ask "Public URL (or leave blank to retry later)" "")"
    fi
  fi

  TUNNEL_HOSTNAME="${url:-}"
  if [[ -n "$TUNNEL_HOSTNAME" ]]; then
    ok "quick cloudflared URL: $TUNNEL_HOSTNAME"
    # Phone home the URL immediately so the control plane can route to us
    # before the heartbeat loop kicks in.
    publish_public_url
  else
    warn "no URL captured; the heartbeat loop will retry once cloudflared logs the URL."
  fi
}

# -----------------------------------------------------------------------------
# publish_public_url - POST /api/consortium/heartbeat with the URL so the
# control plane learns about it without waiting for the systemd heartbeat
# loop's first tick.
# -----------------------------------------------------------------------------
publish_public_url() {
  [[ -n "${TUNNEL_HOSTNAME:-}" ]] || return 0
  [[ -n "${TOKEN:-}" ]] || return 0

  local body http_code
  body=$(jq -n \
    --arg token "$TOKEN" \
    --arg public_url "$TUNNEL_HOSTNAME" \
    '{token:$token, public_url:$public_url}')

  http_code="$(curl -sS -m 10 -o /dev/null -w '%{http_code}' \
    -X POST \
    -H 'Content-Type: application/json' \
    -d "$body" \
    "${CONTROL_PLANE_BASE}/api/consortium/heartbeat" || echo "000")"

  if [[ "$http_code" == "200" ]]; then
    ok "control plane learned tunnel URL"
  else
    warn "failed to publish tunnel URL to control plane (HTTP ${http_code}); heartbeat loop will retry."
  fi
}

setup_cloudflared() {
  TUNNEL_KIND="cloudflared"
  if ! command -v cloudflared >/dev/null 2>&1; then
    warn "cloudflared not installed."
    if ask_yn "Install cloudflared via the Cloudflare apt repo?" "y"; then
      if [[ "$PKG_MGR" == "apt-get" ]]; then
        run bash -c '
          set -e
          mkdir -p --mode=0755 /usr/share/keyrings
          curl -fsSL https://pkg.cloudflare.com/cloudflare-main.gpg | tee /usr/share/keyrings/cloudflare-main.gpg >/dev/null
          echo "deb [signed-by=/usr/share/keyrings/cloudflare-main.gpg] https://pkg.cloudflare.com/cloudflared $(lsb_release -cs 2>/dev/null || echo bookworm) main" \
            | tee /etc/apt/sources.list.d/cloudflared.list
          apt-get update -y && apt-get install -y cloudflared
        '
      else
        warn "auto-install only wired for apt; download from https://github.com/cloudflare/cloudflared/releases"
        TUNNEL_KIND="manual"
        return 0
      fi
    else
      warn "cloudflared install skipped; falling back to manual."
      TUNNEL_KIND="manual"
      setup_manual_tunnel
      return 0
    fi
  fi

  cat <<EOF

  Cloudflare tunnel setup (you'll need a Cloudflare account):

    1. Run interactively (browser flow):
         cloudflared tunnel login
         cloudflared tunnel create llmdeal-consortium-${MACHINE_ID}
         cloudflared tunnel route dns llmdeal-consortium-${MACHINE_ID} \\
             ${MACHINE_ID}.llmdeal-consortium.example.com

    2. Generate config (this script will write a template to
       /etc/cloudflared/llmdeal-consortium.yml).

EOF
  TUNNEL_HOSTNAME="${MACHINE_ID}.llmdeal-consortium.example.com"
  if ! (( FLAG_NON_INTERACTIVE == 1 )); then
    local h
    h="$(ask "Tunnel hostname (after you configure DNS)" "$TUNNEL_HOSTNAME")"
    TUNNEL_HOSTNAME="$h"
  fi

  local cfg=/etc/cloudflared/llmdeal-consortium.yml
  if (( FLAG_DRY_RUN == 0 )); then
    run mkdir -p /etc/cloudflared
    cat >"$cfg" <<EOF_CFG
# llmdeal consortium cloudflared config (template - edit tunnel id after login)
tunnel: llmdeal-consortium-${MACHINE_ID}
credentials-file: /etc/cloudflared/llmdeal-consortium-credentials.json

ingress:
  - hostname: ${TUNNEL_HOSTNAME}
    service: ${INFER_URL}
  - service: http_status:404
EOF_CFG
    chmod 0640 "$cfg"
    ok "wrote $cfg"
  else
    info "[dry-run] would write $cfg"
  fi

  info "after 'cloudflared tunnel login + create', start with:"
  info "  systemctl enable --now cloudflared"
}

setup_autossh() {
  TUNNEL_KIND="autossh"
  if ! command -v autossh >/dev/null 2>&1; then
    install_pkg autossh || {
      warn "autossh missing; falling back to manual."
      TUNNEL_KIND="manual"
      setup_manual_tunnel
      return 0
    }
  fi
  cat <<EOF

  Reverse SSH tunnel setup. The control plane will assign you a port on
  tunnel.llmdeal.me; the operator DMs you an SSH key fingerprint and host
  key separately. Once you have those:

    1. Place the operator-provided private key at ~/.ssh/llmdeal-tunnel
       (chmod 0600).
    2. The systemd unit (step 6) will start autossh with:

       autossh -M 0 -N -T \\
         -o ServerAliveInterval=30 -o ServerAliveCountMax=3 \\
         -o ExitOnForwardFailure=yes \\
         -i ~/.ssh/llmdeal-tunnel \\
         -R 0:127.0.0.1:${TUNNEL_LOCAL_PORT} \\
         consortium@tunnel.llmdeal.me

  This script saves the tunnel parameters into the config and lets the
  heartbeat loop attempt the connection. If tunnel.llmdeal.me is not yet
  online, this falls back to "manual"; the operator will email you when
  the SSH bastion is live.

EOF
  TUNNEL_HOSTNAME="tunnel.llmdeal.me"
}

setup_manual_tunnel() {
  TUNNEL_KIND="manual"
  cat <<EOF

  Manual mode: you must expose ${INFER_URL} to the public internet on
  your own. Common patterns:
    - nginx + Let's Encrypt + your domain
    - ngrok / tailscale funnel
    - port forward + dynamic DNS

  When the URL is live, edit ${CONFIG_FILE} and set:
    PUBLIC_INFERENCE_URL=https://your-host.example.com
  then 'systemctl restart llmdeal-consortium'.

EOF
}

# -----------------------------------------------------------------------------
# Step 6: systemd unit + heartbeat
# -----------------------------------------------------------------------------
write_heartbeat_script() {
  local content
  # shellcheck disable=SC2016
  content=$(cat <<'HEARTBEAT_EOF'
#!/usr/bin/env bash
# llmdeal-consortium-heartbeat.sh
# Posts a heartbeat to the llmdeal control plane every HEARTBEAT_INTERVAL_SEC.
set -euo pipefail

# shellcheck disable=SC1091
source /etc/llmdeal/consortium.env

: "${CONTROL_PLANE_BASE:=https://llmdeal.me}"
: "${HEARTBEAT_INTERVAL_SEC:=60}"
: "${CONSORTIUM_TOKEN:?token missing in /etc/llmdeal/consortium.env}"

cleanup() { exit 0; }
trap cleanup TERM INT

# Build the models array from CONSORTIUM_MODEL_ALIASES (csv).
models_json="[]"
if [[ -n "${CONSORTIUM_MODEL_ALIASES:-}" ]]; then
  models_json=$(IFS=','; arr=($CONSORTIUM_MODEL_ALIASES); printf '%s\n' "${arr[@]}" | \
    jq -R . | jq -s 'map({alias: ., health: "ok"})')
fi

while true; do
  gpu_util="null"
  if command -v nvidia-smi >/dev/null 2>&1; then
    gpu_util=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits 2>/dev/null | \
      head -n1 | tr -dc 0-9 || echo "")
    [[ -z "$gpu_util" ]] && gpu_util="null"
  fi

  # If the quick-cloudflared tunnel is alive, re-derive its public URL on
  # every loop so the control plane learns the freshly-assigned trycloudflare
  # URL after a tunnel restart. Falls back to whatever was captured at setup
  # time (CONSORTIUM_TUNNEL_HOSTNAME) when journalctl is unavailable.
  public_url=""
  if [[ "${CONSORTIUM_TUNNEL_KIND:-}" == "quick-cloudflared" ]]; then
    public_url="$(journalctl -u llmdeal-consortium-tunnel.service --since '5 minutes ago' --no-pager 2>/dev/null \
      | grep -oE 'https://[a-z0-9-]+\.trycloudflare\.com' \
      | tail -n1 || true)"
  fi
  if [[ -z "$public_url" ]] && [[ -n "${CONSORTIUM_TUNNEL_HOSTNAME:-}" ]]; then
    public_url="${CONSORTIUM_TUNNEL_HOSTNAME}"
  fi

  if [[ -n "$public_url" ]]; then
    body=$(jq -n \
      --arg token "$CONSORTIUM_TOKEN" \
      --argjson models "$models_json" \
      --argjson gpu_util "$gpu_util" \
      --arg last_request_iso "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
      --arg public_url "$public_url" \
      '{token:$token, models:$models, gpu_util:$gpu_util, last_request_iso:$last_request_iso, public_url:$public_url}')
  else
    body=$(jq -n \
      --arg token "$CONSORTIUM_TOKEN" \
      --argjson models "$models_json" \
      --argjson gpu_util "$gpu_util" \
      --arg last_request_iso "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
      '{token:$token, models:$models, gpu_util:$gpu_util, last_request_iso:$last_request_iso}')
  fi

  curl -sS -m 10 -X POST \
    -H 'Content-Type: application/json' \
    -d "$body" \
    "${CONTROL_PLANE_BASE}/api/consortium/heartbeat" >/dev/null 2>&1 || true

  sleep "$HEARTBEAT_INTERVAL_SEC"
done
HEARTBEAT_EOF
)

  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would write $HEARTBEAT_SCRIPT"
    return 0
  fi

  printf '%s\n' "$content" >"$HEARTBEAT_SCRIPT"
  chmod 0755 "$HEARTBEAT_SCRIPT"
  ok "wrote $HEARTBEAT_SCRIPT"
}

write_config() {
  local target="${FLAG_CONFIG_PATH:-$CONFIG_FILE}"
  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would write $target"
    return 0
  fi
  run mkdir -p "$(dirname "$target")"

  # Build CSV of aliases.
  local aliases_csv=""
  if (( ${#SELECTED_ALIASES[@]} > 0 )); then
    aliases_csv="$(IFS=','; echo "${SELECTED_ALIASES[*]}")"
  fi

  # Write to a temp file first, then atomically move into place so a
  # crash mid-write doesn't leave a half-baked config readable.
  local tmp
  tmp="$(mktemp)"
  cat >"$tmp" <<EOF_CFG
# llmdeal consortium configuration - generated by consortium.sh v${SCRIPT_VERSION}
# Mode 0600, owner root. Do not commit this file.
#
# Touching CONSORTIUM_TOKEN requires re-running setup with --reconfigure.
CONSORTIUM_TOKEN=${TOKEN}
CONSORTIUM_MACHINE_ID=${MACHINE_ID}
CONSORTIUM_MODEL_ALIASES=${aliases_csv}
CONSORTIUM_TUNNEL_KIND=${TUNNEL_KIND}
CONSORTIUM_TUNNEL_HOSTNAME=${TUNNEL_HOSTNAME:-}
INFERENCE_KIND=${INFER_KIND}
INFERENCE_URL=${INFER_URL}
CONTROL_PLANE_BASE=${CONTROL_PLANE_BASE}
HEARTBEAT_INTERVAL_SEC=60
EOF_CFG
  chmod 0600 "$tmp"
  chown root:root "$tmp" 2>/dev/null || true
  mv "$tmp" "$target"
  ok "wrote $target (mode 0600)"
}

write_systemd_unit() {
  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would write $SYSTEMD_UNIT"
    return 0
  fi
  cat >"$SYSTEMD_UNIT" <<EOF_UNIT
[Unit]
Description=llmdeal consortium heartbeat + tunnel keepalive
Documentation=https://llmdeal.me/setup/consortium-setup.md
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
EnvironmentFile=${CONFIG_FILE}
ExecStart=${HEARTBEAT_SCRIPT}
Restart=on-failure
RestartSec=10
# Don't leak the token into journal.
StandardOutput=null
StandardError=journal
# Hardening.
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
ReadWritePaths=/var/log

[Install]
WantedBy=multi-user.target
EOF_UNIT
  chmod 0644 "$SYSTEMD_UNIT"
  ok "wrote $SYSTEMD_UNIT"
}

step_systemd() {
  section "Step 6 / 8 - systemd unit + heartbeat"

  write_config
  write_heartbeat_script
  write_systemd_unit

  run systemctl daemon-reload
  run systemctl enable --now llmdeal-consortium.service

  if (( FLAG_DRY_RUN == 0 )); then
    sleep 2
    if systemctl is-active --quiet llmdeal-consortium.service; then
      ok "llmdeal-consortium.service active"
    else
      warn "llmdeal-consortium.service not active; check 'journalctl -u llmdeal-consortium -n 50'"
    fi
  fi
}

# -----------------------------------------------------------------------------
# Step 7: smoke test
# -----------------------------------------------------------------------------
step_smoke_test() {
  section "Step 7 / 8 - Smoke test"

  if (( ${#SELECTED_ALIASES[@]} == 0 )); then
    warn "no models selected; skipping smoke test."
    return 0
  fi

  if (( FLAG_DRY_RUN == 1 )); then
    info "[dry-run] would probe ${INFER_URL} + POST /api/consortium/smoke-test"
    return 0
  fi

  # Local probe first.
  local probe_payload probe_resp probe_code
  local first_raw="${SELECTED_MODELS[0]}"
  local first_alias="${SELECTED_ALIASES[0]}"

  case "$INFER_KIND" in
    vllm|tgi|llamacpp|manual)
      probe_payload=$(jq -n --arg m "$first_raw" \
        '{model:$m, messages:[{role:"user", content:"ping"}], max_tokens:1, stream:false}')
      probe_resp="$(curl -sS -m 30 -X POST \
        -H 'Content-Type: application/json' \
        -w '\n__HTTP_%{http_code}__' \
        -d "$probe_payload" \
        "${INFER_URL}/v1/chat/completions" || true)"
      ;;
    ollama)
      probe_payload=$(jq -n --arg m "$first_raw" \
        '{model:$m, prompt:"ping", stream:false, options:{num_predict:1}}')
      probe_resp="$(curl -sS -m 30 -X POST \
        -H 'Content-Type: application/json' \
        -w '\n__HTTP_%{http_code}__' \
        -d "$probe_payload" \
        "${INFER_URL}/api/generate" || true)"
      ;;
  esac

  probe_code="$(printf '%s' "$probe_resp" | grep -oE '__HTTP_[0-9]+__' | tail -n1 | tr -dc 0-9)"
  if [[ "$probe_code" != "200" ]]; then
    err "local inference probe failed (HTTP ${probe_code:-???})."
    err "is ${INFER_URL} actually serving '${first_raw}'?"
    if ask_yn "Continue anyway?" "n"; then
      info "continuing; the model alias '${first_alias}' will not work until you fix the local server."
    else
      fatal "local probe failed; fix the inference server, then re-run."
    fi
  else
    ok "local probe to ${INFER_URL} returned 200."
  fi

  # Control-plane smoke test.
  local body resp http_code
  body=$(jq -n --arg token "$TOKEN" --arg alias "$first_alias" \
    '{token:$token, model_alias:$alias}')
  resp="$(curl -sS -m 30 -X POST \
    -H 'Content-Type: application/json' \
    -w '\n__HTTP_%{http_code}__' \
    -d "$body" \
    "${CONTROL_PLANE_BASE}/api/consortium/smoke-test" || true)"
  http_code="$(printf '%s' "$resp" | grep -oE '__HTTP_[0-9]+__' | tail -n1 | tr -dc 0-9)"
  resp="$(printf '%s' "$resp" | sed -E 's/__HTTP_[0-9]+__$//')"

  if [[ "$http_code" == "200" ]]; then
    local latency note
    latency="$(printf '%s' "$resp" | jq -r '.latency_ms // 0')"
    note="$(printf '%s' "$resp" | jq -r '.note // empty')"
    ok "control plane confirmed reachability (${latency}ms)"
    [[ -n "$note" ]] && info "note: $note"
  else
    err "control-plane smoke test failed (HTTP ${http_code:-???})"
    err "body: $(printf '%s' "$resp" | head -c 400)"
    warn "diagnostics:"
    warn "  - is the tunnel up? (check 'systemctl status cloudflared' or your autossh)"
    warn "  - did you push your tunnel hostname to DNS yet?"
    warn "  - did the operator activate your machine on the control plane?"
    if ask_yn "Retry now?" "y"; then
      step_smoke_test
    fi
  fi
}

# -----------------------------------------------------------------------------
# Step 8: summary
# -----------------------------------------------------------------------------
step_summary() {
  section "Step 8 / 8 - Summary"

  local aliases_csv=""
  if (( ${#SELECTED_ALIASES[@]} > 0 )); then
    aliases_csv="$(IFS=','; echo "${SELECTED_ALIASES[*]}")"
  fi

  cat <<EOF

${C_GREEN}${C_BOLD}  You are live.${C_RESET}

  Machine ID:       ${MACHINE_ID}
  Models served:    ${aliases_csv:-<none>}
  Inference server: ${INFER_KIND} @ ${INFER_URL}
  Tunnel:           ${TUNNEL_KIND}${TUNNEL_HOSTNAME:+ - $TUNNEL_HOSTNAME}
  Heartbeat:        every 60s -> ${CONTROL_PLANE_BASE}/api/consortium/heartbeat

  Monitoring:
    https://llmdeal.me/consortium/m/${MACHINE_ID}
    (this URL may not be live yet - the operator-side dashboard ships next.)

  Day-to-day commands:
    systemctl status  llmdeal-consortium       # health
    journalctl -u     llmdeal-consortium -f    # live logs
    systemctl restart llmdeal-consortium       # restart after config edits
    systemctl disable --now llmdeal-consortium # leave the consortium

  Config file (mode 0600, contains your token - never share):
    ${FLAG_CONFIG_PATH:-$CONFIG_FILE}

  Re-run this script with --reconfigure to change models/tunnel later.

  Questions / problems: DM the llmdeal operator with your machine_id.

EOF
}

# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
main() {
  parse_args "$@"
  _ensure_log
  banner

  require_root

  step_prereqs
  step_identity
  step_detect_server
  step_models
  step_tunnel
  step_systemd
  step_smoke_test
  step_summary

  ok "setup complete."
}

main "$@"
