From e5bb74916a062c0f2fbbf1cdf5476149718b2e39 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Fri, 3 Jul 2026 20:40:22 +0200 Subject: [PATCH 1/2] feat(watcher): prune projects whose root stays missing (ENOENT-only, grace window) Distilled from #738: the watcher now prunes a watched project whose root directory has genuinely disappeared - deleting the cached DB (+wal/shm, validated name, cache-dir-only paths) and removing the watch entry - so vanished worktrees stop being watched forever (#286). Hardened beyond the original PR to remove a data-loss hazard (the cached DB can hold user-authored data such as the ADR, unrecoverable once deleted): - Only ENOENT/ENOTDIR stat failures count as missing. Any other failure (EACCES, EIO, transient mounts, macOS TCC revocation) resets the streak and logs watcher.root_stat_error with the errno; Windows (mingw/UCRT) maps not-found to ENOENT so the check holds there. - Pruning requires BOTH >=3 consecutive missing polls AND a sustained- absence grace window since the streak's first miss (default 600s, override via CBM_WATCHER_PRUNE_GRACE_S), tracked with monotonic cbm_now_ms. - Root reappearance resets streak + timestamp (watcher.root_restored). - The pruned entry is released via the deferred-free list poll_once drains (one freeing model shared with cbm_watcher_unwatch). Limitation: only currently-watched projects are pruned; stale DBs left by older sessions are out of scope. Co-authored-by: pcristin Signed-off-by: Martin Vogel --- src/watcher/watcher.c | 192 ++++++++++++++++++++++++++++++++++--- src/watcher/watcher.h | 8 ++ tests/test_watcher.c | 213 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 399 insertions(+), 14 deletions(-) diff --git a/src/watcher/watcher.c b/src/watcher/watcher.c index 65f935022..6073446f5 100644 --- a/src/watcher/watcher.c +++ b/src/watcher/watcher.c @@ -22,8 +22,10 @@ #include "foundation/compat.h" #include "foundation/compat_thread.h" #include "foundation/compat_fs.h" +#include "foundation/platform.h" #include "foundation/str_util.h" +#include #include #include #include @@ -39,6 +41,8 @@ typedef struct { char last_head[CBM_SZ_64]; /* git HEAD hash */ bool is_git; /* false → skip polling */ bool baseline_done; /* true after first poll */ + int missing_root_count; /* consecutive polls where root was missing (ENOENT/ENOTDIR) */ + uint64_t first_missing_ms; /* cbm_now_ms() of the streak's first miss (0 = no streak) */ int file_count; /* approximate, for interval calc */ int interval_ms; /* adaptive poll interval */ int64_t next_poll_ns; /* next poll time (monotonic ns) */ @@ -70,6 +74,14 @@ struct cbm_watcher { #define POLL_FILE_STEP 500 /* add 1s per this many files */ #define POLL_MAX_MS 60000 +/* Stale-root pruning (#286): a watched project whose root directory stays + * missing is pruned — its cached DB is deleted and the watch entry removed. + * Deletion is destructive (the DB can hold user-authored data such as the + * ADR), so it requires BOTH a streak of consecutive missing polls AND a + * sustained-absence grace window measured from the streak's first miss. */ +#define MISSING_ROOT_DELETE_AFTER 3 +#define PRUNE_GRACE_DEFAULT_S 600 /* 10 min; override: CBM_WATCHER_PRUNE_GRACE_S */ + /* Sleep chunk for responsive shutdown (ms) */ #define SLEEP_CHUNK_MS 500 @@ -245,6 +257,107 @@ static void state_free(project_state_t *s) { free(s); } +/* Move a state onto the deferred-free list (caller holds projects_lock). + * The state may still be referenced by a poll_once snapshot; poll_once + * drains the list at the start of its next cycle. Falls back to an + * immediate free only if growing the list fails. */ +static void defer_state_free(cbm_watcher_t *w, project_state_t *s) { + if (w->pending_free_count >= w->pending_free_cap) { + int new_cap = w->pending_free_cap ? w->pending_free_cap * 2 : 8; + project_state_t **tmp = + realloc(w->pending_free, (size_t)new_cap * sizeof(project_state_t *)); + if (tmp) { + w->pending_free = tmp; + w->pending_free_cap = new_cap; + } + } + if (w->pending_free_count < w->pending_free_cap) { + w->pending_free[w->pending_free_count++] = s; + } else { + state_free(s); /* realloc failed — fall back to immediate free */ + } +} + +/* ── Stale-root pruning (#286) ──────────────────────────────────── */ + +bool cbm_watcher_root_missing_errno(int err) { + /* Only ENOENT/ENOTDIR mean the root itself is gone. Anything else + * (EACCES, EIO, ELOOP, a transient network mount, macOS TCC permission + * revocation) is uncertainty: the directory may still exist even though + * we cannot see it right now — never treat it as a deletion signal. + * Windows (mingw/UCRT) maps ERROR_FILE_NOT_FOUND / ERROR_PATH_NOT_FOUND + * to ENOENT, so the same check holds there (same convention as + * find_deleted_files in pipeline_incremental.c). */ + return err == ENOENT || err == ENOTDIR; +} + +typedef enum { + ROOT_PRESENT = 0, /* stat succeeded and the root is a directory */ + ROOT_MISSING, /* genuinely gone: ENOENT/ENOTDIR (or replaced by a non-directory) */ + ROOT_UNCERTAIN, /* any other stat failure — must NOT count toward pruning */ +} root_status_t; + +static root_status_t root_status(const char *root_path, int *out_errno) { + *out_errno = 0; + if (!root_path) { + return ROOT_UNCERTAIN; + } + struct stat st; + if (stat(root_path, &st) == 0) { + /* Exists but is no longer a directory → the root directory is gone. */ + return S_ISDIR(st.st_mode) ? ROOT_PRESENT : ROOT_MISSING; + } + *out_errno = errno; + return cbm_watcher_root_missing_errno(errno) ? ROOT_MISSING : ROOT_UNCERTAIN; +} + +/* Sustained-absence window (seconds) before a missing root may be pruned. + * Generous default: 10 minutes. Override with CBM_WATCHER_PRUNE_GRACE_S + * (>= 0; 0 prunes as soon as the missing-poll streak is reached). Read on + * each call so tests/operators can adjust via setenv without a restart — + * same convention as cbm_max_file_bytes in limits.c. */ +static long prune_grace_s(void) { + const char *raw = getenv("CBM_WATCHER_PRUNE_GRACE_S"); + if (raw && raw[0]) { + errno = 0; + char *end = NULL; + long v = strtol(raw, &end, 10); + if (errno == 0 && end != raw && *end == '\0' && v >= 0) { + return v; + } + /* Unparseable / negative → fall through to the safe default. */ + } + return PRUNE_GRACE_DEFAULT_S; +} + +/* Format int to string for logging (poll thread only, one use per call). */ +static const char *itoa_buf(int v) { + static CBM_TLS char buf[CBM_SZ_32]; + snprintf(buf, sizeof(buf), "%d", v); + return buf; +} + +static void delete_cached_project_db(const char *project_name) { + if (!cbm_validate_project_name(project_name)) { + return; + } + + const char *cache_dir = cbm_resolve_cache_dir(); + if (!cache_dir) { + return; + } + + char path[CBM_SZ_1K]; + char wal[CBM_SZ_1K]; + char shm[CBM_SZ_1K]; + snprintf(path, sizeof(path), "%s/%s.db", cache_dir, project_name); + snprintf(wal, sizeof(wal), "%s-wal", path); + snprintf(shm, sizeof(shm), "%s-shm", path); + (void)cbm_unlink(path); + (void)cbm_unlink(wal); + (void)cbm_unlink(shm); +} + /* Hash table foreach callback to free state entries */ static void free_state_entry(const char *key, void *val, void *ud) { (void)key; @@ -336,20 +449,7 @@ void cbm_watcher_unwatch(cbm_watcher_t *w, const char *project_name) { /* Defer free: the state may still be referenced by a poll_once * snapshot taken before we acquired the lock. poll_once will * drain this list at the start of its next cycle. */ - if (w->pending_free_count >= w->pending_free_cap) { - int new_cap = w->pending_free_cap ? w->pending_free_cap * 2 : 8; - project_state_t **tmp = - realloc(w->pending_free, (size_t)new_cap * sizeof(project_state_t *)); - if (tmp) { - w->pending_free = tmp; - w->pending_free_cap = new_cap; - } - } - if (w->pending_free_count < w->pending_free_cap) { - w->pending_free[w->pending_free_count++] = s; - } else { - state_free(s); /* realloc failed — fall back to immediate free */ - } + defer_state_free(w, s); removed = true; } cbm_mutex_unlock(&w->projects_lock); @@ -437,6 +537,32 @@ typedef struct { int reindexed; } poll_ctx_t; +static void prune_missing_project(cbm_watcher_t *w, project_state_t *s) { + if (!w || !s || !s->project_name) { + return; + } + + char project_name[CBM_SZ_1K]; + snprintf(project_name, sizeof(project_name), "%s", s->project_name); + + bool removed = false; + cbm_mutex_lock(&w->projects_lock); + project_state_t *current = cbm_ht_get(w->projects, project_name); + if (current == s) { + delete_cached_project_db(project_name); + cbm_ht_delete(w->projects, project_name); + /* Deferred free (same discipline as cbm_watcher_unwatch): this + * state is referenced by the poll_once snapshot iterating us. */ + defer_state_free(w, s); + removed = true; + } + cbm_mutex_unlock(&w->projects_lock); + + if (removed) { + cbm_log_info("watcher.root_pruned", "project", project_name); + } +} + static void poll_project(const char *key, void *val, void *ud) { (void)key; poll_ctx_t *ctx = ud; @@ -445,6 +571,44 @@ static void poll_project(const char *key, void *val, void *ud) { return; } + /* Stale-root pruning (#286): classify the root BEFORE the baseline / + * is_git / interval gates so vanished roots are noticed even for + * non-git projects and regardless of adaptive backoff. */ + int stat_errno = 0; + root_status_t rs = root_status(s->root_path, &stat_errno); + if (rs == ROOT_UNCERTAIN) { + /* EACCES / EIO / network blip / TCC revocation — the root may still + * exist. Never count toward pruning; restart the streak so only an + * uninterrupted run of genuine ENOENT/ENOTDIR observations can + * delete user data. */ + if (s->missing_root_count > 0) { + s->missing_root_count = 0; + s->first_missing_ms = 0; + } + cbm_log_warn("watcher.root_stat_error", "project", s->project_name, "path", s->root_path, + "errno", itoa_buf(stat_errno)); + return; + } + if (rs == ROOT_MISSING) { + uint64_t now_ms = cbm_now_ms(); + if (s->missing_root_count == 0) { + s->first_missing_ms = now_ms; + } + s->missing_root_count++; + cbm_log_warn("watcher.root_missing", "project", s->project_name, "path", s->root_path, + "polls", itoa_buf(s->missing_root_count)); + if (s->missing_root_count >= MISSING_ROOT_DELETE_AFTER && + now_ms - s->first_missing_ms >= (uint64_t)prune_grace_s() * CBM_MSEC_PER_SEC) { + prune_missing_project(ctx->w, s); + } + return; + } + if (s->missing_root_count > 0) { + cbm_log_info("watcher.root_restored", "project", s->project_name, "path", s->root_path); + s->missing_root_count = 0; + s->first_missing_ms = 0; + } + /* Initialize baseline on first poll */ if (!s->baseline_done) { init_baseline(s); diff --git a/src/watcher/watcher.h b/src/watcher/watcher.h index 6647fc38f..8b57466ac 100644 --- a/src/watcher/watcher.h +++ b/src/watcher/watcher.h @@ -70,4 +70,12 @@ int cbm_watcher_watch_count(cbm_watcher_t *w); /* Return the adaptive poll interval (ms) for a given file count. */ int cbm_watcher_poll_interval_ms(int file_count); +/* Classify a stat() errno observed on a watched project root: returns true + * only for values that mean the root itself is gone (ENOENT, ENOTDIR) and + * may count toward stale-root pruning (#286). Any other failure (EACCES, + * EIO, transient mounts, macOS TCC revocation) must NOT count — the cached + * DB holds user-authored data and is unrecoverable once pruned. Exposed + * for direct unit testing with injected errno values. */ +bool cbm_watcher_root_missing_errno(int err); + #endif /* CBM_WATCHER_H */ diff --git a/tests/test_watcher.c b/tests/test_watcher.c index c313f8b74..ffa817495 100644 --- a/tests/test_watcher.c +++ b/tests/test_watcher.c @@ -5,10 +5,12 @@ * poll_once behavior. */ #include "../src/foundation/compat.h" +#include "../src/foundation/platform.h" #include "test_framework.h" #include "test_helpers.h" #include #include +#include #include #include #include @@ -190,6 +192,213 @@ TEST(watcher_poll_nonexistent_path) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * STALE-ROOT PRUNING (#286) + * ══════════════════════════════════════════════════════════════════ */ + +/* Shared fixture for the stale-root pruning tests: a temp project root, a + * temp CBM_CACHE_DIR seeded with db/-wal/-shm files for "stale-project", + * and saved copies of the env vars the tests override. */ +typedef struct { + char rootdir[256]; + char cachedir[256]; + char db_path[512]; + char wal_path[512]; + char shm_path[512]; + char saved_cache_dir[1024]; + bool had_cache_dir; + char saved_grace[64]; + bool had_grace; +} prune_fixture_t; + +/* Returns false (with partial state cleaned up) if setup failed. */ +static bool prune_fixture_setup(prune_fixture_t *f, const char *grace_s) { + snprintf(f->rootdir, sizeof(f->rootdir), "/tmp/cbm_watcher_stale_root_XXXXXX"); + if (!cbm_mkdtemp(f->rootdir)) { + return false; + } + snprintf(f->cachedir, sizeof(f->cachedir), "/tmp/cbm_watcher_stale_cache_XXXXXX"); + if (!cbm_mkdtemp(f->cachedir)) { + th_rmtree(f->rootdir); + return false; + } + + f->had_cache_dir = cbm_safe_getenv("CBM_CACHE_DIR", f->saved_cache_dir, + sizeof(f->saved_cache_dir), NULL) != NULL; + f->had_grace = cbm_safe_getenv("CBM_WATCHER_PRUNE_GRACE_S", f->saved_grace, + sizeof(f->saved_grace), NULL) != NULL; + cbm_setenv("CBM_CACHE_DIR", f->cachedir, 1); + cbm_setenv("CBM_WATCHER_PRUNE_GRACE_S", grace_s, 1); + + snprintf(f->db_path, sizeof(f->db_path), "%s/stale-project.db", f->cachedir); + snprintf(f->wal_path, sizeof(f->wal_path), "%s/stale-project.db-wal", f->cachedir); + snprintf(f->shm_path, sizeof(f->shm_path), "%s/stale-project.db-shm", f->cachedir); + th_write_file(f->db_path, "db\n"); + th_write_file(f->wal_path, "wal\n"); + th_write_file(f->shm_path, "shm\n"); + return true; +} + +static void prune_fixture_teardown(prune_fixture_t *f) { + if (f->had_cache_dir) { + cbm_setenv("CBM_CACHE_DIR", f->saved_cache_dir, 1); + } else { + cbm_unsetenv("CBM_CACHE_DIR"); + } + if (f->had_grace) { + cbm_setenv("CBM_WATCHER_PRUNE_GRACE_S", f->saved_grace, 1); + } else { + cbm_unsetenv("CBM_WATCHER_PRUNE_GRACE_S"); + } + th_rmtree(f->rootdir); + th_rmtree(f->cachedir); +} + +TEST(watcher_prunes_sustained_missing_root) { + /* Positive prune path. Grace window 0s isolates the streak-threshold + * logic; the time gate is guarded by watcher_grace_window_blocks_prune. */ + prune_fixture_t f; + if (!prune_fixture_setup(&f, "0")) { + FAIL("prune fixture setup failed"); + } + + cbm_store_t *store = cbm_store_open_memory(); + cbm_watcher_t *w = cbm_watcher_new(store, index_callback, NULL); + cbm_watcher_watch(w, "stale-project", f.rootdir); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + + /* Existing root: first poll initializes baseline only. */ + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + + th_rmtree(f.rootdir); + + /* Misses #1 and #2: below the streak threshold — keep project + DB. */ + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + ASSERT_EQ(access(f.db_path, F_OK), 0); + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + ASSERT_EQ(access(f.db_path, F_OK), 0); + + /* Miss #3 with the grace window already satisfied: prune the watch + * entry and the cached DB files. */ + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 0); + ASSERT_NEQ(access(f.db_path, F_OK), 0); + ASSERT_NEQ(access(f.wal_path, F_OK), 0); + ASSERT_NEQ(access(f.shm_path, F_OK), 0); + + cbm_watcher_free(w); + cbm_store_close(store); + prune_fixture_teardown(&f); + PASS(); +} + +TEST(watcher_grace_window_blocks_prune) { + /* 3+ missing polls but elapsed < grace → NOT pruned. Uses an explicit + * 600s window so a fast poll burst can never satisfy the time gate. */ + prune_fixture_t f; + if (!prune_fixture_setup(&f, "600")) { + FAIL("prune fixture setup failed"); + } + + cbm_store_t *store = cbm_store_open_memory(); + cbm_watcher_t *w = cbm_watcher_new(store, index_callback, NULL); + cbm_watcher_watch(w, "stale-project", f.rootdir); + + cbm_watcher_poll_once(w); /* baseline */ + th_rmtree(f.rootdir); + + /* 4 consecutive misses in quick succession: streak threshold reached, + * but the sustained-absence window (600s) has not elapsed. */ + for (int i = 0; i < 4; i++) { + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + } + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + ASSERT_EQ(access(f.db_path, F_OK), 0); + ASSERT_EQ(access(f.wal_path, F_OK), 0); + ASSERT_EQ(access(f.shm_path, F_OK), 0); + + cbm_watcher_free(w); + cbm_store_close(store); + prune_fixture_teardown(&f); + PASS(); +} + +TEST(watcher_root_missing_errno_classification) { + /* Only ENOENT/ENOTDIR may count toward pruning; EACCES-style failures + * (permissions, I/O errors, transient mounts, macOS TCC revocation) + * must never increment the missing streak. The classifier is unit- + * tested with injected errno values because a real EACCES cannot be + * simulated portably (tests may run as root on CI; Windows ACLs). */ + ASSERT_TRUE(cbm_watcher_root_missing_errno(ENOENT)); + ASSERT_TRUE(cbm_watcher_root_missing_errno(ENOTDIR)); + ASSERT_FALSE(cbm_watcher_root_missing_errno(0)); + ASSERT_FALSE(cbm_watcher_root_missing_errno(EACCES)); + ASSERT_FALSE(cbm_watcher_root_missing_errno(EIO)); + ASSERT_FALSE(cbm_watcher_root_missing_errno(EINVAL)); + ASSERT_FALSE(cbm_watcher_root_missing_errno(ENAMETOOLONG)); + PASS(); +} + +TEST(watcher_root_restore_resets_prune_streak) { + /* A reappearing root must reset the missing streak AND its first-miss + * timestamp — pruning requires a fresh uninterrupted streak. */ + prune_fixture_t f; + if (!prune_fixture_setup(&f, "0")) { + FAIL("prune fixture setup failed"); + } + + cbm_store_t *store = cbm_store_open_memory(); + cbm_watcher_t *w = cbm_watcher_new(store, index_callback, NULL); + cbm_watcher_watch(w, "stale-project", f.rootdir); + + cbm_watcher_poll_once(w); /* baseline */ + th_rmtree(f.rootdir); + + /* Misses #1 and #2 — one short of the threshold. */ + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + + /* Root comes back (e.g. remount / re-clone): streak resets. */ + if (!cbm_mkdir_p(f.rootdir, 0755)) { + FAIL("mkdir_p restore failed"); + } + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + + th_rmtree(f.rootdir); + + /* Misses #1 and #2 of the NEW streak: must not prune even though the + * total number of misses is now four. */ + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 1); + ASSERT_EQ(access(f.db_path, F_OK), 0); + + /* Miss #3 of the new streak → prune. */ + cbm_watcher_touch(w, "stale-project"); + cbm_watcher_poll_once(w); + ASSERT_EQ(cbm_watcher_watch_count(w), 0); + ASSERT_NEQ(access(f.db_path, F_OK), 0); + + cbm_watcher_free(w); + cbm_store_close(store); + prune_fixture_teardown(&f); + PASS(); +} + TEST(watcher_poll_this_repo) { /* Use this project's own repo as a real git repo test */ cbm_store_t *store = cbm_store_open_memory(); @@ -1693,6 +1902,10 @@ SUITE(watcher) { /* Polling */ RUN_TEST(watcher_poll_no_projects); RUN_TEST(watcher_poll_nonexistent_path); + RUN_TEST(watcher_prunes_sustained_missing_root); + RUN_TEST(watcher_grace_window_blocks_prune); + RUN_TEST(watcher_root_missing_errno_classification); + RUN_TEST(watcher_root_restore_resets_prune_streak); RUN_TEST(watcher_poll_this_repo); RUN_TEST(watcher_stop_flag); From 6ed3dc0749d0fdd86a244e52fac293a2fecfa836 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Sat, 4 Jul 2026 00:19:07 +0200 Subject: [PATCH 2/2] ci: re-trigger CodeQL gate (gate lookup fixed on main via #820) Signed-off-by: Martin Vogel