diff --git a/.github/workflows/install-sh.yml b/.github/workflows/install-sh.yml new file mode 100644 index 0000000..ea6c95e --- /dev/null +++ b/.github/workflows/install-sh.yml @@ -0,0 +1,111 @@ +name: install.sh + +on: + push: + branches: + - main + paths: + - install.sh + - .github/workflows/install-sh.yml + pull_request: + paths: + - install.sh + - .github/workflows/install-sh.yml + +permissions: + contents: read + +jobs: + lint: + name: shellcheck + parse + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - name: shellcheck + run: shellcheck -s sh install.sh + - name: sh parse + run: sh -n install.sh + - name: bash parse + run: bash -n install.sh + + smoke: + name: smoke test (ubuntu) + runs-on: ubuntu-latest + needs: lint + steps: + - uses: actions/checkout@v6 + + - name: run in ubuntu:22.04 container + run: | + docker run --rm -v "$PWD":/w:ro ubuntu:22.04 bash -c ' + set -eu + export DEBIAN_FRONTEND=noninteractive + apt-get update -qq + apt-get install -y -qq curl tar ca-certificates >/dev/null + + echo "::group::--help" + bash /w/install.sh --help | head + echo "::endgroup::" + + echo "::group::install --no-service @ v0.0.5" + bash /w/install.sh --no-service --version v0.0.5 + test -x /usr/local/bin/flashduty-runner + /usr/local/bin/flashduty-runner version | grep -q "0.0.5" + echo "::endgroup::" + + echo "::group::no-op re-run" + bash /w/install.sh --no-service --version v0.0.5 2>&1 | tee /tmp/out + grep -q "Already at v0.0.5" /tmp/out + echo "::endgroup::" + + echo "::group::full install with TOKEN env var" + bash /w/install.sh --uninstall >/dev/null + TOKEN="wnt_ci_test" bash /w/install.sh --version v0.0.5 + test -f /etc/flashduty-runner/env + grep -q "FLASHDUTY_RUNNER_TOKEN=wnt_ci_test" /etc/flashduty-runner/env + id flashduty + test -d /var/lib/flashduty-runner/workspace + echo "::endgroup::" + + echo "::group::env file preserved across update" + echo "# CI marker" >> /etc/flashduty-runner/env + sum_before=$(sha256sum /etc/flashduty-runner/env | awk "{print \$1}") + bash /w/install.sh --version v0.0.5 >/dev/null + sum_after=$(sha256sum /etc/flashduty-runner/env | awk "{print \$1}") + [ "$sum_before" = "$sum_after" ] + echo "::endgroup::" + + echo "::group::uninstall keeps config" + bash /w/install.sh --uninstall + test ! -e /usr/local/bin/flashduty-runner + test -f /etc/flashduty-runner/env + echo "::endgroup::" + + echo "::group::purge" + bash /w/install.sh --purge + test ! -d /etc/flashduty-runner + test ! -d /var/lib/flashduty-runner + ! id flashduty 2>/dev/null + echo "::endgroup::" + + echo "::group::non-tty without TOKEN exits 6" + set +e + bash /w/install.sh --version v0.0.5 /tmp/err 2>&1 + rc=$? + set -e + [ "$rc" = "6" ] + grep -q "Token is required" /tmp/err + echo "::endgroup::" + + echo "::group::nonexistent version exits 5" + set +e + bash /w/install.sh --no-service --version v99.99.99 >/tmp/err2 2>&1 + rc=$? + set -e + [ "$rc" = "5" ] + grep -q "Failed to download" /tmp/err2 + echo "::endgroup::" + + echo "" + echo "ALL CHECKS PASSED" + ' diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..77c5dc9 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,55 @@ +# CLAUDE.md + +**Flashduty Runner** — lightweight agent that connects over WebSocket (TLS) to the fc-safari AI-SRE platform, receives task requests, executes them on the host, and streams results back. Used by end-users to grant Safari access to their servers. + +Not a web service — no HTTP API, no pgy registration. + +## Repo-specific + +| Field | Value | +|---|---| +| Language | Go (Cobra CLI) | +| Default upstream | `wss://api.flashcat.cloud/safari/worknode/ws` (override with `--url`) | +| Auth | token `wnt_…` from Safari (env `FLASHDUTY_RUNNER_TOKEN` or `--token`) | +| Build | `make build` / `make build-all` (linux+darwin × amd64+arm64) | +| Test / lint / fmt | `make test` / `make lint` / `make fmt` | +| Install tools | `make tools` | +| Docker / install target | `make install` | + +## Architecture + +| Dir | Role | +|---|---| +| `cmd/` | CLI — `run`, `version` | +| `ws/` | WebSocket client — reconnect, heartbeat | +| `workspace/` | Sandbox for file ops; symlink-escape protected | +| `permission/` | Glob-based command whitelist/blacklist | +| `protocol/` | Message types (`task.request`, `task.output`, `task.result`, `mcp.call`, `mcp.result`, heartbeat) | +| `mcp/` | MCP protocol layer for tool calls routed from Safari | + +## Permission modes + +Controlled via flags / YAML in `/etc/flashduty-runner/`: + +- **Strict** (default) — whitelist-only. +- **Trust** — allow everything, block only catastrophic patterns (e.g. `rm -rf /`). +- **Read-only** — `cat` / `head` / `ls` / `grep` / `ps` / `df` / … + +Last-match-wins glob ordering. Treat the permission layer as a security boundary — never bypass it to "make a test pass". + +## Environment variables + +| Var | Purpose | +|---|---| +| `FLASHDUTY_RUNNER_TOKEN` | Required. Auth token issued by Safari. | +| `FLASHDUTY_RUNNER_URL` | Override upstream WebSocket URL | +| `FLASHDUTY_RUNNER_WORKSPACE` | Sandbox root | +| `FLASHDUTY_RUNNER_LOG_LEVEL` | `debug` / `info` / `warn` / `error` | + +## Relationship with fc-safari + +Every change to the protocol (new message types, auth handshake, capability negotiation) needs matching changes in fc-safari's worknode handler. Search `fc-safari` for `worknode` / `protocol` usage before modifying. + +## Shared doc + +`@~/.claude/flashcat-dev.md` covers Go env + code style. DB / pgy sections do not apply. diff --git a/README.md b/README.md index bec15f3..d384f76 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,28 @@ permission: ## Quick Start -### Binary Installation +### One-line Install / Update (Linux + macOS) + +```bash +# Install or update (prompts for token if not already set) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash + +# Non-interactive (pass token on the sudo line) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo TOKEN=wnt_xxx bash + +# Pin a specific version +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo VERSION=v0.0.5 bash + +# Uninstall (keeps /etc/flashduty-runner/ config) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash -s -- --uninstall + +# Uninstall and wipe everything (binary, config, workspace, service user) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash -s -- --purge +``` + +On Linux with systemd the script also creates a `flashduty` service user, writes `/etc/flashduty-runner/env`, installs a hardened unit, and runs `systemctl enable --now`. On macOS and non-systemd Linux it installs the binary only. Run with `--help` for all flags. + +### Manual Binary Installation ```bash # Linux (amd64) diff --git a/README_zh.md b/README_zh.md index 6276206..11a3ebf 100644 --- a/README_zh.md +++ b/README_zh.md @@ -98,7 +98,28 @@ permission: ## 快速开始 -### 二进制安装 +### 一键安装 / 升级(Linux + macOS) + +```bash +# 安装或升级(未配置 token 时会交互式提示输入) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash + +# 非交互式(在 sudo 行传入 token) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo TOKEN=wnt_xxx bash + +# 指定版本 +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo VERSION=v0.0.5 bash + +# 卸载(保留 /etc/flashduty-runner/ 配置) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash -s -- --uninstall + +# 彻底卸载(二进制、配置、工作区、服务用户一并删除) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash -s -- --purge +``` + +在带 systemd 的 Linux 上,脚本会创建 `flashduty` 系统用户、写入 `/etc/flashduty-runner/env`、安装加固过的 systemd 单元并执行 `systemctl enable --now`。macOS 和无 systemd 的 Linux 仅安装二进制。使用 `--help` 查看全部参数。 + +### 手动二进制安装 ```bash # Linux (amd64) diff --git a/cmd/main.go b/cmd/main.go index cfadc65..4872346 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -27,10 +27,11 @@ var ( // Command line flags var ( - flagToken string - flagURL string - flagWorkspace string - flagLogLevel string + flagToken string + flagURL string + flagWorkspace string + flagLogLevel string + flagMaxAttempts int ) // Default values @@ -89,6 +90,7 @@ Environment variables: cmd.Flags().StringVar(&flagURL, "url", "", "WebSocket endpoint URL (env: FLASHDUTY_RUNNER_URL)") cmd.Flags().StringVar(&flagWorkspace, "workspace", "", "Workspace root directory (env: FLASHDUTY_RUNNER_WORKSPACE)") cmd.Flags().StringVar(&flagLogLevel, "log-level", "", "Log level: debug, info, warn, error (env: FLASHDUTY_RUNNER_LOG_LEVEL)") + cmd.Flags().IntVar(&flagMaxAttempts, "max-attempts", -1, "Max reconnect attempts (0=unlimited, default=30, env: FLASHDUTY_RUNNER_MAX_ATTEMPTS)") return cmd } @@ -111,6 +113,7 @@ type Config struct { URL string WorkspaceRoot string LogLevel string + MaxAttempts int } func loadConfig() (*Config, error) { @@ -156,6 +159,19 @@ func loadConfig() (*Config, error) { cfg.LogLevel = defaultLogLevel } + // Max attempts: flag > env > default (30) + // -1 means flag wasn't set, so check env; 0 = unlimited + cfg.MaxAttempts = flagMaxAttempts + if cfg.MaxAttempts == -1 { + if envVal := os.Getenv("FLASHDUTY_RUNNER_MAX_ATTEMPTS"); envVal != "" { + if v, err := fmt.Sscanf(envVal, "%d", &cfg.MaxAttempts); v != 1 || err != nil { + return nil, fmt.Errorf("invalid FLASHDUTY_RUNNER_MAX_ATTEMPTS: %s", envVal) + } + } else { + cfg.MaxAttempts = ws.DefaultMaxReconnectAttempts + } + } + return cfg, nil } @@ -172,6 +188,7 @@ func runRunner() error { slog.Info("starting flashduty-runner", "version", Version, "workspace", cfg.WorkspaceRoot, + "max_attempts", cfg.MaxAttempts, ) checker := permission.NewChecker(map[string]string{"*": "allow"}) @@ -190,7 +207,7 @@ func runRunner() error { handler := ws.NewHandler(wspace) // Create WebSocket client - client := ws.NewClient(cfg.Token, cfg.URL, cfg.WorkspaceRoot, handler.Handle, Version) + client := ws.NewClient(cfg.Token, cfg.URL, cfg.WorkspaceRoot, handler.Handle, Version, cfg.MaxAttempts) handler.SetClient(client) // Setup signal handling diff --git a/docs/superpowers/specs/2026-04-23-install-script-design.md b/docs/superpowers/specs/2026-04-23-install-script-design.md new file mode 100644 index 0000000..b2d64f3 --- /dev/null +++ b/docs/superpowers/specs/2026-04-23-install-script-design.md @@ -0,0 +1,319 @@ +# install.sh — one-line installer/updater/uninstaller + +**Status:** approved **Date:** 2026-04-23 **Owner:** flashduty-runner + +## Goal + +Replace the manual 4-line-per-platform install block in the README with a single command that handles install, update, and uninstall for the flashduty-runner binary across Linux and macOS. + +## Non-goals + +- Windows installer (`install.ps1`) — separate future project. +- macOS launchd integration — docs hint only. +- Non-systemd Linux init systems (OpenRC, runit) — binary-only install is acceptable. +- Attestation / cosign signature verification — `checksums.txt` over HTTPS is the trust anchor. +- Distribution via apt / yum / brew — out of scope. + +## User-facing surface + +**Canonical URL:** `https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh` + +```bash +# Install or update (interactive token prompt if needed) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash + +# Non-interactive install (for automation) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh \ + | sudo TOKEN=wnt_xxx bash + +# Pin a specific version +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh \ + | sudo VERSION=v0.0.5 bash + +# Uninstall (keeps /etc/flashduty-runner/env) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh \ + | sudo bash -s -- --uninstall + +# Uninstall + wipe config and working dir +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh \ + | sudo bash -s -- --purge + +# Binary-only (no systemd unit even if systemd is present) +curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh \ + | sudo bash -s -- --no-service +``` + +### Environment variables (installer-local) + +Installer-local vars are unprefixed. The installer writes prefixed `FLASHDUTY_RUNNER_*` vars into `/etc/flashduty-runner/env`, which are the names the running binary reads at runtime. + +Because `sudo` scrubs env by default, these must be passed on the sudo command line (`sudo TOKEN=… bash`), not exported in the caller's shell. + +| Var | Purpose | Default | +|---|---|---| +| `TOKEN` | Written as `FLASHDUTY_RUNNER_TOKEN=…` into env file on first install | prompt via `/dev/tty` if interactive, else exit 6 | +| `VERSION` | Pin a release tag (e.g. `v0.0.5`) | latest release | +| `URL` | Written as `FLASHDUTY_RUNNER_URL=…` into env file | `wss://api.flashcat.cloud/safari/environment/ws` | +| `INSTALL_DIR` | Binary install path | `/usr/local/bin` | +| `REPO` | GitHub owner/repo override (for forks / testing) | `flashcatcloud/flashduty-runner` | + +### Flags + +- `--uninstall` — remove binary and systemd unit, keep `/etc/flashduty-runner/`. +- `--purge` — imply `--uninstall`, also remove `/etc/flashduty-runner/`, `/var/lib/flashduty-runner/`, and the `flashduty` user. +- `--no-service` — skip systemd unit and service user; install binary only. +- `--token ` — flag equivalent of `TOKEN` env var. +- `--version ` — flag equivalent of `VERSION` env var. +- `--help` — print usage and exit 0. + +### Exit codes + +| Code | Meaning | +|---|---| +| 0 | Success or no-op | +| 1 | Generic error | +| 2 | Unsupported platform (OS or arch) | +| 3 | Must be run as root | +| 4 | Checksum mismatch | +| 5 | Download failure | +| 6 | Missing token when service setup is required | + +## Internal structure + +Single POSIX `sh`-compatible file at repo root: `install.sh`. ~250 lines. No bashisms (runs under `dash`, `ash`, `bash`). + +``` +install.sh +├── parse_args() +├── detect_platform() # OS=linux|darwin, ARCH_GR=x86_64|arm64 +├── detect_init() # INIT=systemd|none (skipped on darwin) +├── resolve_version() # $VERSION or latest via GitHub +├── download_and_verify() # tarball + checksums.txt, sha256 +├── install_binary() # atomic mv into place, keep .bak +├── ensure_user() # useradd -r flashduty (idempotent) +├── ensure_workdir() # /var/lib/flashduty-runner/workspace +├── ensure_token() # TOKEN / --token / /dev/tty prompt +├── write_env_file() # /etc/flashduty-runner/env (0600) +├── install_systemd_unit() +├── enable_and_start() +├── uninstall() +└── main() # dispatcher +``` + +### State on disk + +| Path | Owner/Mode | Purpose | +|---|---|---| +| `${INSTALL_DIR}/flashduty-runner` | root:root 0755 | Binary | +| `${INSTALL_DIR}/flashduty-runner.bak` | root:root 0755 | Previous binary (after updates) | +| `/etc/flashduty-runner/env` | root:root 0600 | Runtime env (token, URL, log level) | +| `/etc/systemd/system/flashduty-runner.service` | root:root 0644 | Unit file | +| `/var/lib/flashduty-runner/workspace` | flashduty:flashduty 0750 | Runtime workspace / sandbox root | +| `flashduty` system user | `-r -s /usr/sbin/nologin` | Runs the service | + +(Env file is `0600 root:root` because systemd reads `EnvironmentFile=` itself as PID 1 / root before spawning the service; the `flashduty` service user never reads the file directly.) + +### Logging + +All output on stderr via `info()` / `warn()` / `err()` helpers. Colored only when stderr is a TTY. stdout reserved. + +### Idempotency + +Every step checks current state first: +- User/group: skip if exists. +- Env file: never overwritten on re-install/update — load-bearing contract. +- Systemd unit: rewritten only if content differs (compared via sha). +- Binary: replaced only if version differs from resolved target. + +## Flows + +### Install (fresh machine) + +1. `parse_args` → mode=install. +2. Require root → exit 3 otherwise. +3. `detect_platform` → exit 2 on unsupported OS/arch. +4. `detect_init` → INIT=systemd|none. +5. `resolve_version` → TAG. +6. `download_and_verify`: + - `mkdir` tmp dir, `trap` cleanup. + - `curl -fsSL` tarball + `checksums.txt`. + - `grep -F` exact filename in checksums, compute `sha256sum`/`shasum -a 256`, compare → exit 4 on mismatch. + - `tar -xzf` → binary. +7. `install_binary` → `mv` to `${INSTALL_DIR}/flashduty-runner` (0755). +8. If `--no-service` or OS=darwin → print next-step hint, exit 0. +9. `ensure_user` → `useradd -r -s /usr/sbin/nologin flashduty` (idempotent). +10. `ensure_workdir` → mkdir, chown, chmod. +11. `ensure_token` → use `$TOKEN`, `--token`, or prompt via `/dev/tty` (if interactive) → exit 6 if still empty. +12. `write_env_file` → only if missing; preserve existing content on update. +13. If INIT=systemd: + - `install_systemd_unit` → render from template, write, `daemon-reload`. + - `systemctl enable --now flashduty-runner`. + - Print: *"Installed flashduty-runner vX.Y.Z. Check status: `systemctl status flashduty-runner`"*. +14. Else (INIT=none, Linux without systemd): + - Print: *"Systemd not detected. Binary installed at ${INSTALL_DIR}/flashduty-runner; start manually with `flashduty-runner run` or wire into your init system."* + +### Update (re-run, binary exists) + +1. Steps 1–5 same. +2. Run `${INSTALL_DIR}/flashduty-runner version` to read current version. +3. If current == TAG → log *"Already at vX.Y.Z"*, exit 0. +4. `download_and_verify` (same as install). +5. `systemctl stop` (if systemd and unit exists). +6. `mv ${INSTALL_DIR}/flashduty-runner ${INSTALL_DIR}/flashduty-runner.bak`. +7. `mv` new binary into place. +8. Env file untouched. Systemd unit rewritten only if content differs. +9. `systemctl start` (if service was previously enabled). +10. Verify active after 2 seconds; on failure log warning with `.bak` rollback hint (no auto-rollback). +11. Print *"Updated vA → vB"*. + +### Uninstall + +1. `parse_args` → mode=uninstall, `PURGE=true|false`. +2. Require root. +3. `systemctl stop flashduty-runner` — ignore errors. +4. `systemctl disable flashduty-runner` — ignore errors. +5. `rm -f /etc/systemd/system/flashduty-runner.service`. +6. `systemctl daemon-reload` — ignore errors. +7. `rm -f ${INSTALL_DIR}/flashduty-runner ${INSTALL_DIR}/flashduty-runner.bak`. +8. If `--purge`: + - `rm -rf /etc/flashduty-runner` + - `rm -rf /var/lib/flashduty-runner` + - `userdel flashduty` (ignore errors) +9. Print summary of what was removed. + +Every uninstall step logs and continues; a half-broken install should still clean up as much as possible. + +## Systemd unit + +`/etc/systemd/system/flashduty-runner.service`: + +```ini +[Unit] +Description=Flashduty Runner +Documentation=https://github.com/flashcatcloud/flashduty-runner +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=flashduty +Group=flashduty +EnvironmentFile=/etc/flashduty-runner/env +ExecStart=__INSTALL_DIR__/flashduty-runner run +Restart=always +RestartSec=5 +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=true +PrivateTmp=true +ReadWritePaths=/var/lib/flashduty-runner + +[Install] +WantedBy=multi-user.target +``` + +`__INSTALL_DIR__` is substituted at install time. + +Differences from the README's hand-written example, all intentional: +- `network-online.target` — the runner opens a WSS connection at start; `network.target` is satisfied too early. +- Hardening stanza — defense-in-depth. +- `ReadWritePaths=/var/lib/flashduty-runner` — required because `ProtectSystem=strict` otherwise blocks workspace writes. + +## Env file template + +`/etc/flashduty-runner/env` — mode 0600, owner `root:root` (systemd reads `EnvironmentFile=` as PID 1, so the service user never needs read access): + +```bash +# Managed by install.sh on first install. Edit freely; will not be overwritten on updates. +FLASHDUTY_RUNNER_TOKEN=wnt_xxx +FLASHDUTY_RUNNER_URL=wss://api.flashcat.cloud/safari/environment/ws +FLASHDUTY_RUNNER_WORKSPACE=/var/lib/flashduty-runner/workspace +FLASHDUTY_RUNNER_LOG_LEVEL=info +``` + +Written only on first install (when the file does not exist). Updates must never overwrite it — users who tune permissions, URL, or log level must be able to re-run the installer without losing settings. + +## Download URL convention + +``` +https://github.com/${REPO}/releases/download/${TAG}/flashduty-runner_${OS_TITLE}_${ARCH_GR}.tar.gz +https://github.com/${REPO}/releases/download/${TAG}/checksums.txt +``` + +Where: +- `OS_TITLE` ∈ `Linux`, `Darwin` (title-case from `uname -s`). +- `ARCH_GR` ∈ `x86_64` (amd64), `arm64` — per goreleaser's asset naming. + +### Version resolution + +1. If `VERSION` / `--version` set → use verbatim. +2. Else `curl -fsSLI -o /dev/null -w '%{url_effective}' https://github.com/${REPO}/releases/latest` — parse the redirect target. No GitHub API calls, no rate limits, no token needed. + +## Platform matrix + +| Condition | Behavior | +|---|---| +| Linux + systemd | Full flow: binary + user + env + unit + `enable --now` | +| Linux + no systemd (Alpine, minimal container) | Binary + user + env, skip unit, print run hint | +| Darwin | Binary only, print run hint. No user/env/unit. | +| Other OS | Exit 2 | +| Non-supported arch | Exit 2 | +| Not root | Exit 3 | +| `--no-service` on Linux | Binary only, skip user/env/unit | + +## Failure modes + +| Failure | Detection | Recovery | +|---|---|---| +| Missing `curl` | `command -v curl` | Exit 1 with install hint | +| Missing `tar` | `command -v tar` | Exit 1 with install hint | +| Missing `sha256sum` and `shasum` | combined check | Exit 1 with install hint | +| Download failure | `curl -fsSL` exit code | Exit 5, system untouched | +| Checksum mismatch | computed vs expected | Exit 4, tmp dir cleaned, no binary swap | +| Missing token, non-TTY stdin | `[ -t 0 ]` + `[ -r /dev/tty ]` | Exit 6 with hint | +| `systemctl enable --now` failure | non-zero exit | Warn; binary installed; point at `journalctl` and `.bak` | +| Update: new binary fails to start | `systemctl is-active` after 2s | Same warn; no auto-rollback | +| Crash mid-flow | `trap 'rm -rf "$TMPDIR"' EXIT` | Tmp cleaned; binary not swapped if verification hadn't completed | +| Concurrent install | `flock` on `/var/lock/flashduty-runner-install.lock` | Second invocation blocks or fails fast | + +## Security + +- `curl -fsSL` only. No fallback to http. +- Mandatory SHA256 verification against `checksums.txt` from the same release. +- Atomic binary replacement: tmp write → verify → `mv` on the same filesystem. +- Token never logged, never written outside `/etc/flashduty-runner/env`. +- Token prompt reads from `/dev/tty` with `stty -echo`, never from stdin (which is the pipe). +- No `eval`, no unquoted expansions of user-adjacent strings. `grep -F` for the exact asset filename in `checksums.txt`. +- `checksums.txt` itself is trusted because it comes from GitHub releases over HTTPS — same trust level as `rustup`, `bun`, `ollama`. **Trust-on-first-use caveat:** an attacker controlling the releases page (e.g. compromised maintainer token) could replace both binary and checksums atomically. Users needing stronger guarantees should pin `VERSION` and compare the binary sha against a known value. Attestation verification is out of scope (requires `gh`/`cosign` on host). +- Uninstall preserves `/etc/flashduty-runner/` by default — accidental uninstall doesn't destroy config. + +## Testing plan + +### Manual matrix (pre-merge) + +- Ubuntu 22.04 (systemd): install → update (from pinned older version) → uninstall → `--purge`. +- Alpine 3.19 (no systemd): install → binary-only path prints hint. +- macOS (darwin/arm64): install → binary-only path prints hint. +- `--no-service` on Ubuntu: binary only, no user/env/unit created. +- `VERSION=v0.0.5` on Ubuntu: verify pinning works. +- Re-run with current version: verify no-op path. +- Non-TTY piped install without `TOKEN`: verify exit 6 with clear message. +- Corrupted tarball (simulate by editing checksums in test fork): verify exit 4, no binary swap. + +### Automated CI + +Add `.github/workflows/install-sh.yml`: +- `shellcheck install.sh` — lint. +- `sh -n install.sh` — parse check under POSIX shell. +- Docker smoke test in `ubuntu:22.04`: + - `apt-get install -y curl ca-certificates tar` + - Run script pointed at the real release (`REPO=flashcatcloud/flashduty-runner`). + - Skip `enable --now` (via `--no-service`) so no real token / WSS endpoint is needed. + - Assert `flashduty-runner version` output matches `VERSION`. + - Run `--uninstall` and assert binary is gone. + +## Rollout + +1. Land `install.sh` + CI workflow on `main`. +2. Update `README.md` and `README_zh.md` "Quick Start → Binary Installation" sections to lead with the one-liner, keeping the manual block below for users who want it. +3. No goreleaser changes — the script lives in the repo, not in release assets (matches the decision in Q7). diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..5daa53b --- /dev/null +++ b/install.sh @@ -0,0 +1,499 @@ +#!/bin/sh +# install.sh — installer, updater, and uninstaller for flashduty-runner. +# +# Usage (install or update): +# curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash +# +# See https://github.com/flashcatcloud/flashduty-runner for documentation. + +set -eu + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +: "${REPO:=flashcatcloud/flashduty-runner}" +: "${INSTALL_DIR:=/usr/local/bin}" +: "${URL:=wss://api.flashcat.cloud/safari/environment/ws}" +: "${VERSION:=}" +: "${TOKEN:=}" + +BINARY_NAME="flashduty-runner" +CONFIG_DIR="/etc/flashduty-runner" +ENV_FILE="${CONFIG_DIR}/env" +STATE_DIR="/var/lib/flashduty-runner" +WORKSPACE_DIR="${STATE_DIR}/workspace" +SERVICE_USER="flashduty" +UNIT_PATH="/etc/systemd/system/flashduty-runner.service" +LOCK_FILE="/var/lock/flashduty-runner-install.lock" + +MODE="install" # install | uninstall +PURGE="false" +NO_SERVICE="false" + +TMPDIR_="" + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- + +if [ -t 2 ]; then + COLOR_RED="$(printf '\033[31m')" + COLOR_YLW="$(printf '\033[33m')" + COLOR_GRN="$(printf '\033[32m')" + COLOR_RST="$(printf '\033[0m')" +else + COLOR_RED="" + COLOR_YLW="" + COLOR_GRN="" + COLOR_RST="" +fi + +info() { printf '%s[info]%s %s\n' "$COLOR_GRN" "$COLOR_RST" "$*" >&2; } +warn() { printf '%s[warn]%s %s\n' "$COLOR_YLW" "$COLOR_RST" "$*" >&2; } +err() { printf '%s[err ]%s %s\n' "$COLOR_RED" "$COLOR_RST" "$*" >&2; } + +die() { + code="$1" + shift + err "$*" + exit "$code" +} + +# --------------------------------------------------------------------------- +# Help +# --------------------------------------------------------------------------- + +print_help() { + cat <<'EOF' +flashduty-runner installer + +USAGE: + install.sh [FLAGS] + +FLAGS: + --token Auth token (written to env file on first install) + --version Pin a specific release tag (e.g. v0.0.5) + --no-service Install binary only, skip systemd unit / service user + --uninstall Stop service, remove binary + unit (keeps config) + --purge Uninstall and also remove config + state + user + --help Show this help + +ENVIRONMENT: + TOKEN Same as --token + VERSION Same as --version + URL Runtime WebSocket URL (default: wss://api.flashcat.cloud/…) + INSTALL_DIR Binary install directory (default: /usr/local/bin) + REPO GitHub owner/repo override (default: flashcatcloud/flashduty-runner) + +EXAMPLES: + curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash + curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo TOKEN=wnt_xxx bash + curl -fsSL https://raw.githubusercontent.com/flashcatcloud/flashduty-runner/main/install.sh | sudo bash -s -- --uninstall +EOF +} + +# --------------------------------------------------------------------------- +# Argument parsing +# --------------------------------------------------------------------------- + +parse_args() { + while [ "$#" -gt 0 ]; do + case "$1" in + --help|-h) print_help; exit 0 ;; + --uninstall) MODE="uninstall" ;; + --purge) MODE="uninstall"; PURGE="true" ;; + --no-service) NO_SERVICE="true" ;; + --token) shift; TOKEN="${1:-}" ;; + --token=*) TOKEN="${1#--token=}" ;; + --version) shift; VERSION="${1:-}" ;; + --version=*) VERSION="${1#--version=}" ;; + *) die 1 "Unknown argument: $1. Run with --help for usage." ;; + esac + shift + done +} + +# --------------------------------------------------------------------------- +# Preconditions +# --------------------------------------------------------------------------- + +require_root() { + if [ "$(id -u)" -ne 0 ]; then + die 3 "This script must run as root. Re-run via: curl -fsSL | sudo bash" + fi +} + +require_cmd() { + for c in "$@"; do + if ! command -v "$c" >/dev/null 2>&1; then + die 1 "Required command not found: $c. Please install it and retry." + fi + done +} + +# --------------------------------------------------------------------------- +# Platform detection +# --------------------------------------------------------------------------- + +OS="" +OS_TITLE="" +ARCH_GR="" +INIT="" + +detect_platform() { + uname_s="$(uname -s)" + uname_m="$(uname -m)" + + case "$uname_s" in + Linux) OS="linux"; OS_TITLE="Linux" ;; + Darwin) OS="darwin"; OS_TITLE="Darwin" ;; + *) die 2 "Unsupported OS: $uname_s. Supported: Linux, Darwin." ;; + esac + + case "$uname_m" in + x86_64|amd64) ARCH_GR="x86_64" ;; + arm64|aarch64) ARCH_GR="arm64" ;; + *) die 2 "Unsupported architecture: $uname_m. Supported: x86_64, arm64." ;; + esac +} + +detect_init() { + if [ "$OS" = "linux" ] && [ -d /run/systemd/system ]; then + INIT="systemd" + else + INIT="none" + fi +} + +# --------------------------------------------------------------------------- +# Version resolution +# --------------------------------------------------------------------------- + +resolve_version() { + if [ -n "$VERSION" ]; then + case "$VERSION" in + v*) : ;; + *) VERSION="v$VERSION" ;; + esac + info "Using pinned version: $VERSION" + return + fi + + info "Resolving latest release from github.com/${REPO}" + effective="$(curl --proto '=https' --tlsv1.2 -fsSLI -o /dev/null -w '%{url_effective}' "https://github.com/${REPO}/releases/latest" || true)" + VERSION="${effective##*/}" + + if [ -z "$VERSION" ] || [ "$VERSION" = "latest" ]; then + die 5 "Could not resolve latest version from github.com/${REPO}/releases/latest" + fi + info "Latest version: $VERSION" +} + +get_installed_version() { + if [ -x "${INSTALL_DIR}/${BINARY_NAME}" ]; then + # Output format: "flashduty-runner 0.0.5\n Build Time: ...\n Git Commit: ..." + "${INSTALL_DIR}/${BINARY_NAME}" version 2>/dev/null | awk 'NR==1 {print $2; exit}' || true + fi +} + +# --------------------------------------------------------------------------- +# Download + verify +# --------------------------------------------------------------------------- + +sha256_of() { + file="$1" + if command -v sha256sum >/dev/null 2>&1; then + sha256sum "$file" | awk '{print $1}' + elif command -v shasum >/dev/null 2>&1; then + shasum -a 256 "$file" | awk '{print $1}' + else + die 1 "Need sha256sum or shasum on PATH (install coreutils on Linux)." + fi +} + +download_and_verify() { + asset="${BINARY_NAME}_${OS_TITLE}_${ARCH_GR}.tar.gz" + base="https://github.com/${REPO}/releases/download/${VERSION}" + + TMPDIR_="$(mktemp -d 2>/dev/null || mktemp -d -t frdl)" + trap 'rm -rf "$TMPDIR_"' EXIT INT TERM + + info "Downloading ${asset}" + if ! curl --proto '=https' --tlsv1.2 -fsSL "${base}/${asset}" -o "${TMPDIR_}/${asset}"; then + die 5 "Failed to download ${base}/${asset}" + fi + + info "Downloading checksums.txt" + if ! curl --proto '=https' --tlsv1.2 -fsSL "${base}/checksums.txt" -o "${TMPDIR_}/checksums.txt"; then + die 5 "Failed to download ${base}/checksums.txt" + fi + + # checksums.txt line format: " " (two-space separator from goreleaser). + # Anchor the filename so e.g. "foo_${asset}" cannot false-match. + expected="$(awk -v a="$asset" '$2 == a {print $1; exit}' "${TMPDIR_}/checksums.txt")" + if [ -z "$expected" ]; then + die 4 "Asset ${asset} not found in checksums.txt (wrong release or renamed asset)" + fi + actual="$(sha256_of "${TMPDIR_}/${asset}")" + if [ "$actual" != "$expected" ]; then + die 4 "Checksum mismatch for ${asset}: expected ${expected}, got ${actual}" + fi + info "Checksum OK" + + tar -xzf "${TMPDIR_}/${asset}" -C "${TMPDIR_}" + if [ ! -x "${TMPDIR_}/${BINARY_NAME}" ]; then + die 1 "Extracted tarball does not contain an executable ${BINARY_NAME}" + fi +} + +# --------------------------------------------------------------------------- +# Install steps +# --------------------------------------------------------------------------- + +# Stop the running service (if any) before we swap the binary, so a crash+restart +# in the narrow window between mv and enable_and_start can't launch the new binary +# ahead of schedule. Must run on the update path; cheap no-op on fresh installs. +stop_service_if_running() { + if [ "$INIT" = "systemd" ] && [ -f "$UNIT_PATH" ]; then + if systemctl is-active --quiet flashduty-runner 2>/dev/null; then + info "Stopping flashduty-runner before binary swap" + systemctl stop flashduty-runner || true + fi + fi +} + +install_binary() { + target="${INSTALL_DIR}/${BINARY_NAME}" + mkdir -p "$INSTALL_DIR" + if [ -e "$target" ]; then + mv -f "$target" "${target}.bak" + fi + mv "${TMPDIR_}/${BINARY_NAME}" "$target" + chmod 0755 "$target" + chown 0:0 "$target" 2>/dev/null || true + info "Installed ${target}" +} + +ensure_user() { + if id "$SERVICE_USER" >/dev/null 2>&1; then + return + fi + info "Creating system user: ${SERVICE_USER}" + if command -v useradd >/dev/null 2>&1; then + useradd --system --home-dir "$STATE_DIR" --shell /usr/sbin/nologin "$SERVICE_USER" + elif command -v adduser >/dev/null 2>&1; then + # Alpine / busybox fallback. + adduser -S -H -h "$STATE_DIR" -s /sbin/nologin "$SERVICE_USER" + else + die 1 "Neither useradd nor adduser available — cannot create service user" + fi +} + +ensure_workdir() { + # Refuse to touch a pre-existing symlink — prevents a pre-staged /var/lib/flashduty-runner + # → / symlink from turning our recursive chown into a full-FS chown. + if [ -L "$STATE_DIR" ]; then + die 1 "${STATE_DIR} is a symlink — refusing to chown through it." + fi + mkdir -p "$WORKSPACE_DIR" + chown -R "$SERVICE_USER":"$SERVICE_USER" "$STATE_DIR" 2>/dev/null || true + chmod 0750 "$STATE_DIR" "$WORKSPACE_DIR" +} + +ensure_token() { + # Already have a token in env file? Leave it alone on update. + if [ -f "$ENV_FILE" ] && grep -q '^FLASHDUTY_RUNNER_TOKEN=' "$ENV_FILE"; then + return + fi + if [ -n "$TOKEN" ]; then + return + fi + # Probe whether /dev/tty is actually openable. Mode bits alone aren't enough + # (it exists as a device in containers without a tty, but open() returns ENXIO). + # The subshell isolates the failing redirect from `set -e`. + if ( : /dev/null; then + printf 'Enter Flashduty token: ' >/dev/tty + old_stty="" + # Restore terminal echo on Ctrl-C, otherwise the user is left with a silent shell. + if command -v stty >/dev/null 2>&1; then + old_stty="$(stty -g /dev/null || true)" + if [ -n "$old_stty" ]; then + trap 'stty "$old_stty" /dev/null || true; exit 130' INT TERM + fi + stty -echo /dev/null || true + fi + IFS= read -r TOKEN /dev/null || true + trap - INT TERM + fi + printf '\n' >/dev/tty + fi + if [ -z "$TOKEN" ]; then + die 6 "Token is required. Set TOKEN=wnt_xxx before sudo, pass --token, or run the script from a terminal." + fi +} + +write_env_file() { + mkdir -p "$CONFIG_DIR" + chmod 0755 "$CONFIG_DIR" + if [ -f "$ENV_FILE" ]; then + info "Env file exists, leaving it unchanged: ${ENV_FILE}" + return + fi + umask 077 + # Use printf (not heredoc) so shell metacharacters in $TOKEN / $URL stay literal. + { + printf '# Managed by install.sh on first install. Edit freely; updates will not overwrite this file.\n' + printf 'FLASHDUTY_RUNNER_TOKEN=%s\n' "$TOKEN" + printf 'FLASHDUTY_RUNNER_URL=%s\n' "$URL" + printf 'FLASHDUTY_RUNNER_WORKSPACE=%s\n' "$WORKSPACE_DIR" + printf 'FLASHDUTY_RUNNER_LOG_LEVEL=info\n' + } >"$ENV_FILE" + chown 0:0 "$ENV_FILE" 2>/dev/null || true + chmod 0600 "$ENV_FILE" + info "Wrote env file: ${ENV_FILE}" +} + +install_systemd_unit() { + new_unit="${TMPDIR_}/flashduty-runner.service" + cat >"$new_unit" </dev/null 2>&1 || true + systemctl start flashduty-runner + + # Give the process a moment to exec + either connect or crash, then check status. + sleep 2 + if systemctl is-active --quiet flashduty-runner; then + info "Service is active. Check status: systemctl status flashduty-runner" + else + warn "Service failed to start. Logs: journalctl -u flashduty-runner -n 50" + warn "Previous binary preserved at ${INSTALL_DIR}/${BINARY_NAME}.bak for rollback." + fi +} + +# --------------------------------------------------------------------------- +# Uninstall +# --------------------------------------------------------------------------- + +uninstall() { + info "Uninstalling flashduty-runner (purge=${PURGE})" + if command -v systemctl >/dev/null 2>&1 && [ -f "$UNIT_PATH" ]; then + systemctl stop flashduty-runner 2>/dev/null || true + systemctl disable flashduty-runner 2>/dev/null || true + rm -f "$UNIT_PATH" + systemctl daemon-reload 2>/dev/null || true + fi + rm -f "${INSTALL_DIR}/${BINARY_NAME}" "${INSTALL_DIR}/${BINARY_NAME}.bak" + if [ "$PURGE" = "true" ]; then + rm -rf "$CONFIG_DIR" "$STATE_DIR" + if id "$SERVICE_USER" >/dev/null 2>&1; then + if command -v userdel >/dev/null 2>&1; then + userdel "$SERVICE_USER" 2>/dev/null || true + elif command -v deluser >/dev/null 2>&1; then + deluser "$SERVICE_USER" 2>/dev/null || true + fi + fi + info "Purge complete." + else + info "Uninstall complete. Config preserved at ${CONFIG_DIR}. Run with --purge to remove it." + fi +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +do_install() { + detect_platform + detect_init + require_cmd curl tar + + resolve_version + installed="$(get_installed_version || true)" + # Binary reports "0.0.5"; VERSION carries "v0.0.5" — strip the leading "v" for comparison. + if [ -n "$installed" ] && [ "$installed" = "${VERSION#v}" ]; then + info "Already at ${VERSION}, nothing to do." + return + fi + + download_and_verify + stop_service_if_running + install_binary + + if [ "$NO_SERVICE" = "true" ] || [ "$OS" = "darwin" ]; then + info "Binary-only install complete." + info "Start manually: ${INSTALL_DIR}/${BINARY_NAME} run --token " + return + fi + + ensure_user + ensure_workdir + ensure_token + write_env_file + + if [ "$INIT" = "systemd" ]; then + install_systemd_unit + enable_and_start + info "Installed ${BINARY_NAME} ${VERSION}." + else + info "Systemd not detected. Binary installed at ${INSTALL_DIR}/${BINARY_NAME}." + info "Start manually with 'flashduty-runner run' or wire it into your init system." + fi +} + +main() { + parse_args "$@" + require_root + + # Serialize concurrent installs. flock is optional; skip if absent. + if command -v flock >/dev/null 2>&1; then + mkdir -p "$(dirname "$LOCK_FILE")" + exec 9>"$LOCK_FILE" + if ! flock -n 9; then + die 1 "Another install.sh is already running (lock: ${LOCK_FILE})" + fi + fi + + case "$MODE" in + install) do_install ;; + uninstall) uninstall ;; + *) die 1 "Unknown mode: $MODE" ;; + esac +} + +main "$@" diff --git a/ws/client.go b/ws/client.go index c3dbf49..021b2dd 100644 --- a/ws/client.go +++ b/ws/client.go @@ -30,8 +30,8 @@ const ( // Pong wait time pongWait = 60 * time.Second - // Maximum reconnect attempts - maxReconnectAttempts = 30 + // DefaultMaxReconnectAttempts is the default max reconnect attempts (0 = unlimited). + DefaultMaxReconnectAttempts = 30 // Initial reconnect delay (used for first few attempts) initialReconnectDelay = 1 * time.Second @@ -54,6 +54,7 @@ type Client struct { handler MessageHandler version string envInfo *protocol.EnvironmentInfo + maxAttempts int // 0 = unlimited mu sync.Mutex conn *websocket.Conn @@ -70,13 +71,15 @@ type Client struct { } // NewClient creates a new WebSocket client. -func NewClient(token, apiURL, workspaceRoot string, handler MessageHandler, version string) *Client { +// maxAttempts controls reconnection: 0 = unlimited, >0 = max attempts before giving up. +func NewClient(token, apiURL, workspaceRoot string, handler MessageHandler, version string, maxAttempts int) *Client { return &Client{ token: token, apiURL: apiURL, workspaceRoot: workspaceRoot, handler: handler, version: version, + maxAttempts: maxAttempts, envInfo: collectEnvironmentInfo(workspaceRoot), stopCh: make(chan struct{}), doneCh: make(chan struct{}), @@ -204,13 +207,13 @@ func (c *Client) RunWithReconnect(ctx context.Context) error { // Connect if err := c.Connect(ctx); err != nil { attempt++ - if attempt > maxReconnectAttempts { - return fmt.Errorf("max reconnect attempts exceeded: %w", err) + if c.maxAttempts > 0 && attempt > c.maxAttempts { + return fmt.Errorf("max reconnect attempts (%d) exceeded: %w", c.maxAttempts, err) } slog.Warn("connection failed, retrying", "attempt", attempt, - "max_attempts", maxReconnectAttempts, + "max_attempts", c.maxAttempts, "delay", delay, "error", err, )