From 7b93210ca29d4b21b030a33636d3dc0755769c39 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Fri, 3 Jul 2026 20:05:10 +0200 Subject: [PATCH 1/3] fix(cli): overflow-safe ZIP entry bounds check in self-update extraction The truncation check `header_end + (int)comp_size > data_len` was bypassable for comp_size >= 2^31: the int cast turns the size negative, the sum drops below data_len, and the entry is accepted -- authorizing inflate to read up to ~4GB past the downloaded archive buffer during self-update ZIP extraction. Replace it with `header_end > data_len || comp_size > (uint32_t)(data_len - header_end)`, which is overflow-safe and strictly tighter. Regression guard cli_extract_binary_from_zip_rejects_truncated_deflate_size_over_int_max builds a 52-byte archive whose entry claims comp_size=0xFFFF0000 with a self-terminating DEFLATE stream: the old check admits it and extraction returns non-NULL (verified red pre-fix); the new check rejects it. Secondary cppcheck cleanups from the same review: - cli.c: extract zip_read_u16le/zip_read_u32le little-endian helpers (behavior-identical) and widen zip_extract_entry sizes to size_t with an explicit UINT_MAX guard before the zlib uInt narrowing. - cypher.c: drop unreachable IS NULL / IS NOT NULL null guards in eval_condition (resolve_condition_value result is checked earlier). Distilled from PR #784. Co-authored-by: SS-42 Signed-off-by: Martin Vogel --- src/cli/cli.c | 48 ++++++++++++++++++++++----------------------- src/cypher/cypher.c | 4 ++-- tests/test_cli.c | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/src/cli/cli.c b/src/cli/cli.c index 5930d317d..a3492d065 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -80,6 +80,7 @@ enum { #endif #include // EEXIST #include // open, O_WRONLY, O_CREAT, O_TRUNC +#include // UINT_MAX #include // uintptr_t #include #include @@ -2485,12 +2486,22 @@ enum { ZIP_STORED = 0, ZIP_DEFLATE = 8 }; -static const uint32_t ZIP_MAX_UNCOMP = 500U * 1024U * 1024U; +static const size_t ZIP_MAX_UNCOMP = 500U * 1024U * 1024U; + +static uint16_t zip_read_u16le(const unsigned char *p) { + return (uint16_t)((uint16_t)p[0] | ((uint16_t)p[1] << BYTE_SHIFT)); +} + +static uint32_t zip_read_u32le(const unsigned char *p) { + return ((uint32_t)p[0]) | ((uint32_t)p[1] << BYTE_SHIFT) | + ((uint32_t)p[2] << (BYTE_SHIFT * CLI_PAIR_LEN)) | + ((uint32_t)p[3] << (BYTE_SHIFT * CLI_JSON_INDENT)); +} /* Decompress a single zip entry (stored or deflated). Returns malloc'd buffer * or NULL on failure. *out_len receives the decompressed size. */ static unsigned char *zip_extract_entry(const unsigned char *file_data, uint16_t method, - uint32_t comp_size, uint32_t uncomp_size, int *out_len) { + size_t comp_size, size_t uncomp_size, int *out_len) { if (method == ZIP_STORED) { if (comp_size > ZIP_MAX_UNCOMP) { return NULL; @@ -2507,15 +2518,18 @@ static unsigned char *zip_extract_entry(const unsigned char *file_data, uint16_t if (uncomp_size > ZIP_MAX_UNCOMP) { return NULL; } + if (comp_size > UINT_MAX || uncomp_size > UINT_MAX) { + return NULL; + } unsigned char *out = malloc(uncomp_size); if (!out) { return NULL; } z_stream strm = {0}; strm.next_in = (unsigned char *)file_data; - strm.avail_in = comp_size; + strm.avail_in = (uInt)comp_size; strm.next_out = out; - strm.avail_out = uncomp_size; + strm.avail_out = (uInt)uncomp_size; if (inflateInit2(&strm, -MAX_WBITS) != Z_OK) { free(out); return NULL; @@ -2545,28 +2559,14 @@ unsigned char *cbm_extract_binary_from_zip(const unsigned char *data, int data_l break; } - uint16_t method = (uint16_t)(data[pos + ZIP_OFF_METHOD] | - (data[pos + ZIP_OFF_METHOD + CLI_SKIP_ONE] << BYTE_SHIFT)); - uint32_t comp_size = - (uint32_t)(data[pos + ZIP_OFF_COMP] | - (data[pos + ZIP_OFF_COMP + CLI_SKIP_ONE] << BYTE_SHIFT) | - (data[pos + ZIP_OFF_COMP + CLI_PAIR_LEN] << (BYTE_SHIFT * CLI_PAIR_LEN)) | - (data[pos + ZIP_OFF_COMP + CLI_JSON_INDENT] - << (BYTE_SHIFT * CLI_JSON_INDENT))); - uint32_t uncomp_size = - (uint32_t)(data[pos + ZIP_OFF_UNCOMP] | - (data[pos + ZIP_OFF_UNCOMP + CLI_SKIP_ONE] << BYTE_SHIFT) | - (data[pos + ZIP_OFF_UNCOMP + CLI_PAIR_LEN] << (BYTE_SHIFT * CLI_PAIR_LEN)) | - (data[pos + ZIP_OFF_UNCOMP + CLI_JSON_INDENT] - << (BYTE_SHIFT * CLI_JSON_INDENT))); - uint16_t name_len = (uint16_t)(data[pos + ZIP_OFF_NAMELEN] | - (data[pos + ZIP_OFF_NAMELEN + CLI_SKIP_ONE] << BYTE_SHIFT)); - uint16_t extra_len = - (uint16_t)(data[pos + ZIP_OFF_EXTRALEN] | - (data[pos + ZIP_OFF_EXTRALEN + CLI_SKIP_ONE] << BYTE_SHIFT)); + uint16_t method = zip_read_u16le(data + pos + ZIP_OFF_METHOD); + uint32_t comp_size = zip_read_u32le(data + pos + ZIP_OFF_COMP); + uint32_t uncomp_size = zip_read_u32le(data + pos + ZIP_OFF_UNCOMP); + uint16_t name_len = zip_read_u16le(data + pos + ZIP_OFF_NAMELEN); + uint16_t extra_len = zip_read_u16le(data + pos + ZIP_OFF_EXTRALEN); int header_end = pos + ZIP_HDR_SZ + name_len + extra_len; - if (header_end + (int)comp_size > data_len) { + if (header_end > data_len || comp_size > (uint32_t)(data_len - header_end)) { break; } diff --git a/src/cypher/cypher.c b/src/cypher/cypher.c index 77bc7105a..b211ae815 100644 --- a/src/cypher/cypher.c +++ b/src/cypher/cypher.c @@ -2441,11 +2441,11 @@ static bool eval_condition(const cbm_condition_t *c, binding_t *b) { /* IS NULL / IS NOT NULL */ if (strcmp(c->op, "IS NULL") == 0) { - result = (!actual || actual[0] == '\0'); + result = (actual[0] == '\0'); return c->negated ? !result : result; } if (strcmp(c->op, "IS NOT NULL") == 0) { - result = (actual && actual[0] != '\0'); + result = (actual[0] != '\0'); return c->negated ? !result : result; } diff --git a/tests/test_cli.c b/tests/test_cli.c index af300ab51..de21895b9 100644 --- a/tests/test_cli.c +++ b/tests/test_cli.c @@ -14,6 +14,7 @@ #include "test_helpers.h" #include #include +#include #include #include #include @@ -1361,6 +1362,47 @@ TEST(cli_extract_binary_from_zip_invalid) { PASS(); } +TEST(cli_extract_binary_from_zip_rejects_truncated_deflate_size_over_int_max) { + const char *filename = "codebase-memory-mcp"; + const unsigned char deflated[] = {0xAB, 0x00, 0x00}; /* raw DEFLATE for "x" */ + size_t name_len = strlen(filename); + size_t zip_len = 30 + name_len + sizeof(deflated); + unsigned char *zip = calloc(1, zip_len); + ASSERT_NOT_NULL(zip); + + uint32_t comp_size = 0xFFFF0000U; + uint32_t uncomp_size = 1U; + zip[0] = 0x50; + zip[1] = 0x4B; + zip[2] = 0x03; + zip[3] = 0x04; + zip[8] = 8; + zip[9] = 0; + zip[18] = (unsigned char)(comp_size & 0xFF); + zip[19] = (unsigned char)((comp_size >> 8) & 0xFF); + zip[20] = (unsigned char)((comp_size >> 16) & 0xFF); + zip[21] = (unsigned char)((comp_size >> 24) & 0xFF); + zip[22] = (unsigned char)(uncomp_size & 0xFF); + zip[23] = (unsigned char)((uncomp_size >> 8) & 0xFF); + zip[24] = (unsigned char)((uncomp_size >> 16) & 0xFF); + zip[25] = (unsigned char)((uncomp_size >> 24) & 0xFF); + zip[26] = (unsigned char)(name_len & 0xFF); + zip[27] = (unsigned char)((name_len >> 8) & 0xFF); + memcpy(zip + 30, filename, name_len); + memcpy(zip + 30 + name_len, deflated, sizeof(deflated)); + + int out_len = 0; + unsigned char *extracted = cbm_extract_binary_from_zip(zip, (int)zip_len, &out_len); + if (extracted) { + free(extracted); + free(zip); + FAIL("accepted a truncated deflated zip entry with a wrapped compressed size"); + } + ASSERT_EQ(out_len, 0); + free(zip); + PASS(); +} + /* ═══════════════════════════════════════════════════════════════════ * Skill dry-run tests * ═══════════════════════════════════════════════════════════════════ */ @@ -2981,6 +3023,7 @@ SUITE(cli) { RUN_TEST(cli_extract_binary_from_zip_not_found); RUN_TEST(cli_extract_binary_from_zip_path_traversal); RUN_TEST(cli_extract_binary_from_zip_invalid); + RUN_TEST(cli_extract_binary_from_zip_rejects_truncated_deflate_size_over_int_max); /* Dry-run lifecycle (2 tests) */ RUN_TEST(cli_install_dry_run); From 50392a418879725f63dc06d01901ac67fd31f88f Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Fri, 3 Jul 2026 20:11:49 +0200 Subject: [PATCH 2/3] fix(scripts): use /usr/bin/env bash shebangs (NixOS has no /bin/bash) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On NixOS (and other non-FHS systems) /bin/bash does not exist, so scripts with an absolute shebang fail to run. Switch the remaining holdouts to /usr/bin/env bash: eleven scripts/*.sh, test-infrastructure/run.sh, and the three Claude Code hook scripts emitted by src/cli/cli.c (gate, session reminder, subagent reminder). Distilled from PR #674, with parser-test coverage preserved: the infra_parse_shell* fixtures in tests/test_pipeline.c intentionally keep #!/bin/bash so absolute-path shebang extraction stays covered, and tests/repro fixtures are untouched. Also replace the GitHub-PAT-shaped fixture string flagged in the #674 thread with an obviously fake placeholder (ghp_FAKE...) that still matches the ghp_ + 36-alnum secret detector. Co-authored-by: Sandro Jäckel Signed-off-by: Martin Vogel --- scripts/benchmark-search-graph.sh | 2 +- scripts/build.sh | 2 +- scripts/check-nolint-whitelist.sh | 2 +- scripts/clean.sh | 2 +- scripts/embed-frontend.sh | 2 +- scripts/env.sh | 2 +- scripts/lint.sh | 2 +- scripts/repro.sh | 2 +- scripts/soak-test.sh | 2 +- scripts/test.sh | 2 +- scripts/vendor-grammar.sh | 2 +- src/cli/cli.c | 6 +++--- test-infrastructure/run.sh | 2 +- tests/test_pipeline.c | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/benchmark-search-graph.sh b/scripts/benchmark-search-graph.sh index 3da58492c..cc94147ec 100755 --- a/scripts/benchmark-search-graph.sh +++ b/scripts/benchmark-search-graph.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # benchmark-search-graph.sh — Time search_graph name_pattern= queries against a # codebase-memory-mcp binary to measure the regex / LIKE pre-filter performance. # diff --git a/scripts/build.sh b/scripts/build.sh index bbb9bda74..8d9dad383 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # build.sh — Clean build of production binary (standard or with UI). # # Usage: diff --git a/scripts/check-nolint-whitelist.sh b/scripts/check-nolint-whitelist.sh index f2432bfe2..757a270fc 100755 --- a/scripts/check-nolint-whitelist.sh +++ b/scripts/check-nolint-whitelist.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Verify NOLINT(misc-no-recursion) only appears on whitelisted functions. WHITELIST="src/foundation/recursion_whitelist.h" if [ ! -f "$WHITELIST" ]; then diff --git a/scripts/clean.sh b/scripts/clean.sh index 662cf3aa8..ea9e15976 100755 --- a/scripts/clean.sh +++ b/scripts/clean.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # clean.sh — Remove ALL build artifacts, caches, and generated files. # # Usage: scripts/clean.sh diff --git a/scripts/embed-frontend.sh b/scripts/embed-frontend.sh index 908c39790..0b7b87309 100755 --- a/scripts/embed-frontend.sh +++ b/scripts/embed-frontend.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # embed-frontend.sh — Convert built frontend assets into linkable object files. # # Usage: scripts/embed-frontend.sh diff --git a/scripts/env.sh b/scripts/env.sh index 8cb5f9999..f7922dd96 100755 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # env.sh — Shared environment detection for all build scripts. # # Sourced by test.sh, build.sh, lint.sh. Not meant to run standalone. diff --git a/scripts/lint.sh b/scripts/lint.sh index 60e023b47..bb2b0ec1e 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # lint.sh — Run all linters (clang-tidy + cppcheck + clang-format). # # Usage: diff --git a/scripts/repro.sh b/scripts/repro.sh index d585a9ec7..78162e779 100755 --- a/scripts/repro.sh +++ b/scripts/repro.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # repro.sh — Build + run the cumulative BUG-REPRODUCTION suite (test-repro). # # Unlike test.sh (the gating suite, must be GREEN), this suite is RED by design: diff --git a/scripts/soak-test.sh b/scripts/soak-test.sh index 211cec5da..ab0dff778 100755 --- a/scripts/soak-test.sh +++ b/scripts/soak-test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # soak-test.sh — Endurance test for codebase-memory-mcp. # # Runs compressed workload cycles: queries, file mutations, reindexes, idle periods. diff --git a/scripts/test.sh b/scripts/test.sh index cd98491e2..1f5c6d9ed 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # test.sh — Clean build + run all C tests with ASan + UBSan. # # Usage: diff --git a/scripts/vendor-grammar.sh b/scripts/vendor-grammar.sh index 83009f2ff..696f486e4 100755 --- a/scripts/vendor-grammar.sh +++ b/scripts/vendor-grammar.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # vendor-grammar.sh: Vendor a single tree-sitter grammar into internal/cbm/vendored/grammars// # Usage: ./scripts/vendor-grammar.sh [subdir] # repo_url: GitHub repository URL (e.g., https://github.com/tree-sitter/tree-sitter-json) diff --git a/src/cli/cli.c b/src/cli/cli.c index a3492d065..a9850c77e 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -2055,7 +2055,7 @@ void cbm_install_hook_gate_script(const char *home, const char *binary_path) { return; } (void)fprintf(f, - "#!/bin/bash\n" + "#!/usr/bin/env bash\n" "# codebase-memory-mcp search augmenter (Claude Code PreToolUse).\n" "# NOTE: the legacy filename is kept for zero-migration upgrades.\n" "# Despite the name this NEVER blocks a tool call - it only adds\n" @@ -2099,7 +2099,7 @@ static void cbm_install_session_reminder_script(const char *home) { return; } (void)fprintf( - f, "#!/bin/bash\n" + f, "#!/usr/bin/env bash\n" "# SessionStart hook: remind agent to use codebase-memory-mcp tools.\n" "# Installed by codebase-memory-mcp. Fires on startup/resume/clear/compact.\n" "cat << 'REMINDER'\n" @@ -2187,7 +2187,7 @@ static void cbm_install_subagent_reminder_script(const char *home) { * backslashes, or newlines, so the JSON below is valid as written — no * runtime escaping (and no python3/jq dependency) is required. */ (void)fprintf(f, - "#!/bin/bash\n" + "#!/usr/bin/env bash\n" "# SubagentStart hook: tell subagents to use codebase-memory-mcp tools.\n" "# Installed by codebase-memory-mcp. Fires when any subagent is spawned.\n" "# SubagentStart injects context via JSON additionalContext, not plain stdout.\n" diff --git a/test-infrastructure/run.sh b/test-infrastructure/run.sh index 32b1826db..2786f2559 100755 --- a/test-infrastructure/run.sh +++ b/test-infrastructure/run.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Local CI — test all platforms before pushing. # # Coverage: diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 3e7edf23c..7cba2a031 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -4669,7 +4669,7 @@ TEST(envscan_secret_value_exclusion) { write_temp_file( tmpdir, "deploy.sh", "#!/bin/bash\n" - "export GH_URL=\"https://ghp_abcdefghijklmnopqrstuvwxyz1234567890@github.com/repo\"\n" + "export GH_URL=\"https://ghp_FAKEFAKEFAKEFAKEFAKEFAKEFAKEFAKEFAKE@github.com/repo\"\n" "export NORMAL_ENDPOINT=\"https://api.example.com/orders\"\n"); cbm_env_binding_t bindings[32]; From c2744264d990c8c54c170ffdab39955d22bb3833 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Fri, 3 Jul 2026 23:31:14 +0200 Subject: [PATCH 3/3] ci: re-trigger CodeQL gate (gate lookup fixed on main via #820) Signed-off-by: Martin Vogel