From 9973ff5ffc5856ac8f6861f1727c3a11e31163de Mon Sep 17 00:00:00 2001
From: sciapanCA <sciapan@codealive.dev>
Date: Sat, 13 Jun 2026 21:27:20 +0200
Subject: [PATCH 1/2] Add DataSource parameter for artifacts endpoints

---
 src/tests/test_artifact_relationships.py | 92 ++++++++++++++++++++++++
 src/tests/test_fetch_artifacts.py        | 70 ++++++++++++++++++
 src/tests/test_response_transformer.py   | 24 +++++++
 src/tools/artifact_relationships.py      | 38 ++++++++--
 src/tools/fetch_artifacts.py             | 36 +++++++++-
 src/utils/errors.py                      | 11 +++
 src/utils/response_transformer.py        | 14 ++++
 7 files changed, 277 insertions(+), 8 deletions(-)

diff --git a/src/tests/test_artifact_relationships.py b/src/tests/test_artifact_relationships.py
index ad4cbdc..a907294 100644
--- a/src/tests/test_artifact_relationships.py
+++ b/src/tests/test_artifact_relationships.py
@@ -372,6 +372,80 @@ async def test_explicit_profile_maps_correctly(self, mock_get_api_key):
 
         call_args = mock_client.post.call_args
         assert call_args[1]["json"]["profile"] == "InheritanceOnly"
+        # No data_source supplied => omitted from the body.
+        assert "dataSource" not in call_args[1]["json"]
+
+    @pytest.mark.asyncio
+    @patch("tools.artifact_relationships.get_api_key_from_context")
+    async def test_forwards_data_source(self, mock_get_api_key):
+        mock_get_api_key.return_value = "test_key"
+
+        ctx = MagicMock(spec=Context)
+        ctx.debug = AsyncMock()
+        ctx.error = AsyncMock()
+
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "sourceIdentifier": "id",
+            "profile": "CallsOnly",
+            "found": True,
+            "relationships": [],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        mock_context = MagicMock()
+        mock_context.client = mock_client
+        mock_context.base_url = "https://app.codealive.ai"
+        ctx.request_context.lifespan_context = mock_context
+
+        await get_artifact_relationships(
+            ctx=ctx,
+            identifier="id",
+            data_source="repo (main)",
+        )
+
+        assert mock_client.post.call_args[1]["json"]["dataSource"] == "repo (main)"
+
+    @pytest.mark.asyncio
+    @patch("tools.artifact_relationships.get_api_key_from_context")
+    async def test_ambiguous_409_surfaces_candidate_data_sources(self, mock_get_api_key):
+        import httpx
+
+        mock_get_api_key.return_value = "test_key"
+
+        ctx = MagicMock(spec=Context)
+        ctx.debug = AsyncMock()
+        ctx.error = AsyncMock()
+
+        mock_response = MagicMock()
+        mock_response.status_code = 409
+        mock_response.text = (
+            '{"detail": "Identifier matches 2 data sources: '
+            "Name='repo (main)' Id='ds-main', Name='repo (master)' Id='ds-master'\"}"
+        )
+        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+            "Conflict", request=MagicMock(), response=mock_response
+        )
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        mock_context = MagicMock()
+        mock_context.client = mock_client
+        mock_context.base_url = "https://app.codealive.ai"
+        ctx.request_context.lifespan_context = mock_context
+
+        with pytest.raises(ToolError) as exc:
+            await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Symbol")
+
+        message = str(exc.value)
+        assert "409" in message
+        # The candidate data sources from the backend 409 must be surfaced, plus the data_source retry hint.
+        assert "repo (main)" in message and "repo (master)" in message
+        assert "data_source" in message
 
     @pytest.mark.asyncio
     async def test_empty_identifier_raises_tool_error(self):
@@ -446,3 +520,21 @@ async def test_not_found_response_renders_correctly(self, mock_get_api_key):
 
         assert data["found"] is False
         assert "relationships" not in data
+
+    def test_not_found_hint_with_data_source_suggests_retry_or_omit(self):
+        payload = _build_relationships_dict(
+            {"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False},
+            data_source="repo (main)",
+        )
+        hint = payload["hint"]
+        assert "repo (main)" in hint
+        assert "data_source" in hint
+        assert "omit" in hint.lower()
+
+    def test_not_found_hint_without_data_source_is_generic(self):
+        payload = _build_relationships_dict(
+            {"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False},
+        )
+        hint = payload["hint"]
+        assert "data_source" not in hint
+        assert "fresh identifier" in hint
diff --git a/src/tests/test_fetch_artifacts.py b/src/tests/test_fetch_artifacts.py
index 0c10dc0..eb5f38b 100644
--- a/src/tests/test_fetch_artifacts.py
+++ b/src/tests/test_fetch_artifacts.py
@@ -365,6 +365,39 @@ def test_hint_absent_when_no_artifacts_have_content(self):
         assert "<hint>" not in result
 
 
+class TestBuildArtifactsXmlDataSourceMissHint:
+    """When a data_source was supplied but nothing was found, hint to retry or drop it."""
+
+    def test_hint_when_data_source_scoped_returns_nothing(self):
+        data = {"artifacts": [
+            {"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None},
+        ]}
+        result = _build_artifacts_xml(data, data_source="repo (main)")
+        assert "<hint>" in result
+        assert "repo (main)" in result
+        # Guides toward the two recovery moves.
+        assert "data_source" in result
+        assert "omit" in result.lower()
+
+    def test_hint_when_empty_artifacts_and_data_source(self):
+        result = _build_artifacts_xml({"artifacts": []}, data_source="ds-main")
+        assert "ds-main" in result and "<hint>" in result
+
+    def test_no_miss_hint_when_data_source_resolved_content(self):
+        data = {"artifacts": [
+            {"identifier": "repo::a.ts::F", "content": "code", "contentByteSize": 4},
+        ]}
+        result = _build_artifacts_xml(data, data_source="repo (main)")
+        assert "omit data_source" not in result
+
+    def test_no_miss_hint_without_data_source(self):
+        data = {"artifacts": [
+            {"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None},
+        ]}
+        result = _build_artifacts_xml(data)
+        assert "<hint>" not in result
+
+
 @pytest.mark.asyncio
 @patch('tools.fetch_artifacts.get_api_key_from_context')
 async def test_fetch_artifacts_returns_xml(mock_get_api_key):
@@ -476,6 +509,43 @@ async def test_fetch_artifacts_posts_correct_body(mock_get_api_key):
     body = call_args.kwargs["json"]
     assert body["identifiers"] == ["id1", "id2"]
     assert "names" not in body
+    # No data_source supplied => the field is omitted (preserves the 409-on-ambiguity fallback).
+    assert "dataSource" not in body
+
+
+@pytest.mark.asyncio
+@patch('tools.fetch_artifacts.get_api_key_from_context')
+async def test_fetch_artifacts_forwards_data_source(mock_get_api_key):
+    """data_source (Name or Id) is forwarded as the DataSource body field when provided."""
+    mock_get_api_key.return_value = "test_key"
+
+    ctx = MagicMock(spec=Context)
+    ctx.info = AsyncMock()
+    ctx.warning = AsyncMock()
+    ctx.error = AsyncMock()
+
+    mock_response = MagicMock()
+    mock_response.json.return_value = {"artifacts": []}
+    mock_response.raise_for_status = MagicMock()
+
+    mock_client = AsyncMock()
+    mock_client.post.return_value = mock_response
+
+    mock_codealive_context = MagicMock()
+    mock_codealive_context.client = mock_client
+    mock_codealive_context.base_url = "https://app.codealive.ai"
+
+    ctx.request_context.lifespan_context = mock_codealive_context
+    ctx.request_context.headers = {"authorization": "Bearer test_key"}
+
+    await fetch_artifacts(
+        ctx=ctx,
+        identifiers=["id1"],
+        data_source="repo (main)",
+    )
+
+    body = mock_client.post.call_args.kwargs["json"]
+    assert body["dataSource"] == "repo (main)"
 
 
 @pytest.mark.asyncio
diff --git a/src/tests/test_response_transformer.py b/src/tests/test_response_transformer.py
index ad0a359..a1c12af 100644
--- a/src/tests/test_response_transformer.py
+++ b/src/tests/test_response_transformer.py
@@ -294,10 +294,34 @@ def test_data_preservation(self):
         assert first["identifier"] == "CodeAlive-AI/codealive-mcp::src/tools/search.py::codebase_search"
         assert first["contentByteSize"] == 8500
         assert first["description"] == "Main search function"
+        # Data-source identity must be surfaced (not stripped) so the agent can feed it back
+        # as `data_source` to disambiguate a branch-blind identifier.
+        assert first["dataSource"] == {"id": "685b21230e3822f4efa9d073", "name": "codealive-mcp"}
 
         assert second["path"] == "README.md"
         assert second["kind"] == "Chunk"
         assert second["description"] == "Search documentation section"
+        assert second["dataSource"] == {"id": "685b21230e3822f4efa9d073", "name": "codealive-mcp"}
+
+    def test_grep_transform_surfaces_data_source(self):
+        response = {
+            "results": [
+                {
+                    "kind": "File",
+                    "identifier": "owner/repo::src/auth.py",
+                    "location": {"path": "src/auth.py"},
+                    "matchCount": 1,
+                    "matches": [
+                        {"lineNumber": 3, "startColumn": 0, "endColumn": 4, "lineText": "auth"}
+                    ],
+                    "dataSource": {"type": "repository", "id": "ds-main", "name": "repo (main)"},
+                }
+            ]
+        }
+
+        result = transform_grep_response(response)
+
+        assert result["results"][0]["dataSource"] == {"id": "ds-main", "name": "repo (main)"}
 
     def test_grep_transform_preserves_match_previews(self):
         response = {
diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py
index 1e15117..2ffd371 100644
--- a/src/tools/artifact_relationships.py
+++ b/src/tools/artifact_relationships.py
@@ -1,6 +1,6 @@
 """Artifact relationships tool implementation."""
 
-from typing import Any, Dict, List, Literal
+from typing import Any, Dict, List, Literal, Optional
 from urllib.parse import urljoin
 
 import httpx
@@ -37,6 +37,7 @@ async def get_artifact_relationships(
     identifier: str,
     profile: Literal["callsOnly", "inheritanceOnly", "allRelevant", "referencesOnly"] = "callsOnly",
     max_count_per_type: int = 50,
+    data_source: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Retrieve relationship groups for a single artifact by profile.
@@ -84,6 +85,11 @@ async def get_artifact_relationships(
                  - "allRelevant": calls + inheritance only; references are excluded
                  - "referencesOnly": where-used LSP references for non-call usage
         max_count_per_type: Maximum related artifacts per relationship type (1–1000, default 50).
+        data_source: Optional data-source Name or Id used to disambiguate an identifier that
+                     exists in more than one data source. Copy the `dataSource.name` or
+                     `dataSource.id` from a search result. Omit it for normal lookups; if the
+                     source identifier is ambiguous and you omit it, the backend returns a 409
+                     listing the candidate data sources.
 
     Returns:
         A dict with grouped relationships:
@@ -103,6 +109,7 @@ async def get_artifact_relationships(
         "identifier": identifier,
         "profile": profile,
         "max_count_per_type": max_count_per_type,
+        "data_source": data_source,
     }
 
     if not identifier:
@@ -143,6 +150,8 @@ async def get_artifact_relationships(
             "profile": api_profile,
             "maxCountPerType": max_count_per_type,
         }
+        if data_source:
+            body["dataSource"] = data_source
 
         await ctx.debug(f"Fetching {profile} relationships for artifact")
 
@@ -156,7 +165,7 @@ async def get_artifact_relationships(
         log_api_response(response, request_id)
         response.raise_for_status()
 
-        return _build_relationships_dict(response.json())
+        return _build_relationships_dict(response.json(), data_source=data_source)
 
     except (httpx.HTTPStatusError, Exception) as e:
         logger.bind(
@@ -173,15 +182,24 @@ async def get_artifact_relationships(
                     "(2) call semantic_search or grep_search again to get a fresh identifier — the index may have changed, "
                     "(3) check that the artifact is a function/class (relationships are not available for non-symbol artifacts)"
                 ),
+                409: (
+                    "(1) the identifier exists in more than one data source — see the candidate data sources in the Detail above; each one will resolve, "
+                    "(2) retry get_artifact_relationships with data_source set to one candidate's Name or Id; if that data source isn't the one you want, retry with the next candidate, "
+                    "(3) do NOT invent relation results — pick from the listed data sources"
+                ),
             },
         )
 
 
-def _build_relationships_dict(data: dict) -> Dict[str, Any]:
+def _build_relationships_dict(data: dict, data_source: Optional[str] = None) -> Dict[str, Any]:
     """Build a dict representation of an artifact relationships response.
 
     FastMCP serializes the dict via pydantic_core.to_json, which preserves UTF-8 —
     don't reintroduce json.dumps here, it would re-escape non-ASCII identifiers.
+
+    ``data_source`` is the selector the caller passed (if any); when the source is not
+    found it shapes the recovery hint so the agent can retry with another data source
+    or drop the selector.
     """
     raw_source_id = data.get("sourceIdentifier") or ""
     raw_profile = data.get("profile") or ""
@@ -208,9 +226,9 @@ def _build_relationships_dict(data: dict) -> Dict[str, Any]:
         counts = _build_counts(data.get("availableRelationshipCounts"))
         if counts is not None:
             payload["availableRelationshipCounts"] = counts
-        payload["hint"] = _build_relationship_hint(found, mcp_profile, groups, counts)
+        payload["hint"] = _build_relationship_hint(found, mcp_profile, groups, counts, data_source)
     else:
-        payload["hint"] = _build_relationship_hint(found, mcp_profile, [], None)
+        payload["hint"] = _build_relationship_hint(found, mcp_profile, [], None, data_source)
 
     return payload
 
@@ -266,9 +284,19 @@ def _build_relationship_hint(
     profile: str,
     groups: List[Dict[str, Any]],
     counts: Dict[str, int] | None,
+    data_source: Optional[str] = None,
 ) -> str:
     """Give model-facing next-step guidance for graph traversal results."""
     if not found:
+        if data_source:
+            return (
+                f'No relationship data was found for this identifier in data source "{data_source}". '
+                "The identifier may belong to a different data source, or the data_source value may be "
+                "wrong. Try: re-run with data_source set to a different candidate (use the `dataSource` "
+                "name or id from your search results, or call get_data_sources), or omit data_source "
+                "entirely — if the identifier is ambiguous you then get a 409 listing the candidate data "
+                "sources. Otherwise re-run semantic_search or grep_search to get a fresh identifier."
+            )
         return (
             "No relationship data was found for this identifier. Verify that the identifier came from "
             "a recent search/fetch result and points to a symbol-level artifact; otherwise re-run "
diff --git a/src/tools/fetch_artifacts.py b/src/tools/fetch_artifacts.py
index d61ab93..962df26 100644
--- a/src/tools/fetch_artifacts.py
+++ b/src/tools/fetch_artifacts.py
@@ -1,6 +1,6 @@
 """Fetch artifacts tool implementation."""
 
-from typing import List, Union
+from typing import List, Optional, Union
 from urllib.parse import urljoin
 
 import httpx
@@ -17,6 +17,7 @@
 async def fetch_artifacts(
     ctx: Context,
     identifiers: Union[str, List[str]],
+    data_source: Optional[str] = None,
 ) -> str:
     """
     Retrieve the full content of code artifacts by their identifiers.
@@ -39,6 +40,12 @@ async def fetch_artifacts(
                        File:    "my-org/backend::src/services/auth.py"
                        Chunk:   "my-org/backend::README.md::0042"
 
+        data_source: Optional data-source Name or Id used to disambiguate an identifier that
+                     exists in more than one data source. Copy the `dataSource.name` or
+                     `dataSource.id` from the search result you want. Omit it for normal lookups;
+                     if an identifier is ambiguous and you omit it, the backend returns a 409
+                     listing the candidate data sources.
+
     Returns:
         XML with full content and call relationships for each found artifact:
         <artifacts>
@@ -92,6 +99,8 @@ async def fetch_artifacts(
         }
 
         body = {"identifiers": identifiers}
+        if data_source:
+            body["dataSource"] = data_source
 
         await ctx.info(f"Fetching {len(identifiers)} artifact(s)")
 
@@ -112,7 +121,7 @@ async def fetch_artifacts(
         artifacts_data = response.json()
 
         # Build XML output
-        return _build_artifacts_xml(artifacts_data)
+        return _build_artifacts_xml(artifacts_data, data_source=data_source)
 
     except (httpx.HTTPStatusError, Exception) as e:
         # handle_api_error raises ToolError → MCP response gets isError: true
@@ -124,6 +133,11 @@ async def fetch_artifacts(
                     "(2) re-run semantic_search or grep_search to get fresh identifiers — the index may have changed, "
                     "(3) for local repos in your working directory, use Read() on the file path instead"
                 ),
+                409: (
+                    "(1) the identifier exists in more than one data source — see the candidate data sources in the Detail above; each one will resolve, "
+                    "(2) retry fetch_artifacts with data_source set to one candidate's Name or Id; if that data source isn't the one you want, retry with the next candidate, "
+                    "(3) do NOT invent a result — pick from the listed data sources"
+                ),
             },
         )
 
@@ -147,7 +161,7 @@ def _add_line_numbers(content: str, start_line: int = 1) -> str:
     return "\n".join(numbered)
 
 
-def _build_artifacts_xml(data: dict) -> str:
+def _build_artifacts_xml(data: dict, data_source: str | None = None) -> str:
     """Build XML representation of fetched artifacts.
 
     Backend DTO: Identifier (string), Content (string?), ContentByteSize (long?),
@@ -157,16 +171,22 @@ def _build_artifacts_xml(data: dict) -> str:
 
     Content is emitted raw (no HTML escaping) and wrapped between newlines so the
     LLM sees the source code exactly as-is.
+
+    When ``data_source`` was supplied and nothing was found, emit a recovery hint:
+    the identifier may live in a different data source, or the selector may be wrong —
+    so the agent can retry with another data source or drop the selector entirely.
     """
     xml_parts = ["<artifacts>"]
 
     has_any_relationships = False
+    emitted = 0
     artifacts = data.get("artifacts", [])
     for artifact in artifacts:
         content = artifact.get("content")
         if content is None:
             continue
 
+        emitted += 1
         identifier = artifact.get("identifier", "")
         content_byte_size = artifact.get("contentByteSize")
 
@@ -200,6 +220,16 @@ def _build_artifacts_xml(data: dict) -> str:
             'an artifact identifier.</hint>'
         )
 
+    if emitted == 0 and data_source:
+        xml_parts.append(
+            f'  <hint>No artifacts were found in data source "{data_source}". The identifier may '
+            'belong to a different data source, or the data_source value may be wrong. Try: '
+            '(1) re-run fetch_artifacts with data_source set to a different candidate (use the '
+            '`dataSource` name or id from your search results, or call get_data_sources), or '
+            '(2) omit data_source entirely — if the identifier is ambiguous you then get a 409 '
+            'that lists the candidate data sources to choose from.</hint>'
+        )
+
     xml_parts.append("</artifacts>")
     return "\n".join(xml_parts)
 
diff --git a/src/utils/errors.py b/src/utils/errors.py
index 730bf29..22b662a 100644
--- a/src/utils/errors.py
+++ b/src/utils/errors.py
@@ -138,6 +138,17 @@ class _ErrorTemplate:
             "(3) verify any identifiers were returned by a recent semantic_search, grep_search, or codebase_search"
         ),
     ),
+    409: _ErrorTemplate(
+        label="Ambiguous data source (409): the identifier matches more than one data source",
+        retryable=True,
+        retry_window="retry once with data_source set",
+        default_hint=(
+            "(1) read the candidate data sources in the Detail below — every one will resolve, "
+            "(2) retry with data_source set to one candidate's Name or Id; if that data source isn't "
+            "the one you want, retry with the next candidate, "
+            "(3) do not invent a result"
+        ),
+    ),
     422: _ErrorTemplate(
         label="Data source not ready (422): The requested data source is still being indexed",
         retryable=True,
diff --git a/src/utils/response_transformer.py b/src/utils/response_transformer.py
index fd33994..2bdcbce 100644
--- a/src/utils/response_transformer.py
+++ b/src/utils/response_transformer.py
@@ -171,6 +171,20 @@ def _build_result_dict(path: str, result: Dict) -> Dict[str, Any]:
     if result.get("identifier"):
         info["identifier"] = result["identifier"]
 
+    # Surface the data source identity (id + name) so the agent can disambiguate a branch-blind
+    # identifier — the same repo indexed on two branches yields identical identifiers across data
+    # sources. Either value can be passed back as `data_source` to fetch_artifacts /
+    # get_artifact_relationships. (Previously this field was stripped here.)
+    data_source = result.get("dataSource")
+    if isinstance(data_source, dict):
+        ds_info: Dict[str, Any] = {}
+        if data_source.get("id"):
+            ds_info["id"] = data_source["id"]
+        if data_source.get("name"):
+            ds_info["name"] = data_source["name"]
+        if ds_info:
+            info["dataSource"] = ds_info
+
     if result.get("contentByteSize") is not None:
         info["contentByteSize"] = result["contentByteSize"]
 

From e68b73c31c376cb4fea5e3055b2a34e887bdeee8 Mon Sep 17 00:00:00 2001
From: sciapanCA <sciapan@codealive.dev>
Date: Sat, 13 Jun 2026 23:23:50 +0200
Subject: [PATCH 2/2] PR feedback for artifacts endpoints

---
 src/tests/test_artifact_relationships.py | 49 +++++++++++++++++++++---
 src/tests/test_fetch_artifacts.py        | 48 ++++++++++++++++++++---
 src/tests/test_response_transformer.py   |  4 +-
 src/tools/artifact_relationships.py      |  6 +++
 src/tools/fetch_artifacts.py             |  5 +++
 5 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/src/tests/test_artifact_relationships.py b/src/tests/test_artifact_relationships.py
index a907294..ef2a977 100644
--- a/src/tests/test_artifact_relationships.py
+++ b/src/tests/test_artifact_relationships.py
@@ -404,10 +404,47 @@ async def test_forwards_data_source(self, mock_get_api_key):
         await get_artifact_relationships(
             ctx=ctx,
             identifier="id",
-            data_source="repo (main)",
+            data_source="backend",
         )
 
-        assert mock_client.post.call_args[1]["json"]["dataSource"] == "repo (main)"
+        assert mock_client.post.call_args[1]["json"]["dataSource"] == "backend"
+
+    @pytest.mark.asyncio
+    @patch("tools.artifact_relationships.get_api_key_from_context")
+    async def test_whitespace_data_source_omitted(self, mock_get_api_key):
+        """A whitespace-only data_source normalizes to None: not sent to the backend
+        and not echoed in the not-found hint (preserves the 409-on-ambiguity fallback)."""
+        mock_get_api_key.return_value = "test_key"
+
+        ctx = MagicMock(spec=Context)
+        ctx.debug = AsyncMock()
+        ctx.error = AsyncMock()
+
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "sourceIdentifier": "id",
+            "profile": "CallsOnly",
+            "found": False,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        mock_context = MagicMock()
+        mock_context.client = mock_client
+        mock_context.base_url = "https://app.codealive.ai"
+        ctx.request_context.lifespan_context = mock_context
+
+        result = await get_artifact_relationships(
+            ctx=ctx,
+            identifier="id",
+            data_source="   ",
+        )
+
+        assert "dataSource" not in mock_client.post.call_args[1]["json"]
+        # The confusing `... in data source "   "` hint must not appear.
+        assert '"   "' not in result["hint"]
 
     @pytest.mark.asyncio
     @patch("tools.artifact_relationships.get_api_key_from_context")
@@ -424,7 +461,7 @@ async def test_ambiguous_409_surfaces_candidate_data_sources(self, mock_get_api_
         mock_response.status_code = 409
         mock_response.text = (
             '{"detail": "Identifier matches 2 data sources: '
-            "Name='repo (main)' Id='ds-main', Name='repo (master)' Id='ds-master'\"}"
+            "Name='backend' Id='ds-main', Name='backend-legacy' Id='ds-master'\"}"
         )
         mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
             "Conflict", request=MagicMock(), response=mock_response
@@ -444,7 +481,7 @@ async def test_ambiguous_409_surfaces_candidate_data_sources(self, mock_get_api_
         message = str(exc.value)
         assert "409" in message
         # The candidate data sources from the backend 409 must be surfaced, plus the data_source retry hint.
-        assert "repo (main)" in message and "repo (master)" in message
+        assert "backend" in message and "backend-legacy" in message
         assert "data_source" in message
 
     @pytest.mark.asyncio
@@ -524,10 +561,10 @@ async def test_not_found_response_renders_correctly(self, mock_get_api_key):
     def test_not_found_hint_with_data_source_suggests_retry_or_omit(self):
         payload = _build_relationships_dict(
             {"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False},
-            data_source="repo (main)",
+            data_source="backend",
         )
         hint = payload["hint"]
-        assert "repo (main)" in hint
+        assert "backend" in hint
         assert "data_source" in hint
         assert "omit" in hint.lower()
 
diff --git a/src/tests/test_fetch_artifacts.py b/src/tests/test_fetch_artifacts.py
index eb5f38b..46a7d88 100644
--- a/src/tests/test_fetch_artifacts.py
+++ b/src/tests/test_fetch_artifacts.py
@@ -372,9 +372,9 @@ def test_hint_when_data_source_scoped_returns_nothing(self):
         data = {"artifacts": [
             {"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None},
         ]}
-        result = _build_artifacts_xml(data, data_source="repo (main)")
+        result = _build_artifacts_xml(data, data_source="backend")
         assert "<hint>" in result
-        assert "repo (main)" in result
+        assert "backend" in result
         # Guides toward the two recovery moves.
         assert "data_source" in result
         assert "omit" in result.lower()
@@ -387,7 +387,7 @@ def test_no_miss_hint_when_data_source_resolved_content(self):
         data = {"artifacts": [
             {"identifier": "repo::a.ts::F", "content": "code", "contentByteSize": 4},
         ]}
-        result = _build_artifacts_xml(data, data_source="repo (main)")
+        result = _build_artifacts_xml(data, data_source="backend")
         assert "omit data_source" not in result
 
     def test_no_miss_hint_without_data_source(self):
@@ -541,11 +541,49 @@ async def test_fetch_artifacts_forwards_data_source(mock_get_api_key):
     await fetch_artifacts(
         ctx=ctx,
         identifiers=["id1"],
-        data_source="repo (main)",
+        data_source="backend",
     )
 
     body = mock_client.post.call_args.kwargs["json"]
-    assert body["dataSource"] == "repo (main)"
+    assert body["dataSource"] == "backend"
+
+
+@pytest.mark.asyncio
+@patch('tools.fetch_artifacts.get_api_key_from_context')
+async def test_fetch_artifacts_whitespace_data_source_omitted(mock_get_api_key):
+    """A whitespace-only data_source normalizes to None: not sent to the backend
+    and not echoed in the not-found hint (preserves the 409-on-ambiguity fallback)."""
+    mock_get_api_key.return_value = "test_key"
+
+    ctx = MagicMock(spec=Context)
+    ctx.info = AsyncMock()
+    ctx.warning = AsyncMock()
+    ctx.error = AsyncMock()
+
+    mock_response = MagicMock()
+    mock_response.json.return_value = {"artifacts": []}
+    mock_response.raise_for_status = MagicMock()
+
+    mock_client = AsyncMock()
+    mock_client.post.return_value = mock_response
+
+    mock_codealive_context = MagicMock()
+    mock_codealive_context.client = mock_client
+    mock_codealive_context.base_url = "https://app.codealive.ai"
+
+    ctx.request_context.lifespan_context = mock_codealive_context
+    ctx.request_context.headers = {"authorization": "Bearer test_key"}
+
+    result = await fetch_artifacts(
+        ctx=ctx,
+        identifiers=["id1"],
+        data_source="   ",
+    )
+
+    body = mock_client.post.call_args.kwargs["json"]
+    assert "dataSource" not in body
+    # The confusing `... data source "   "` hint must not appear.
+    assert '"   "' not in result
 
 
 @pytest.mark.asyncio
diff --git a/src/tests/test_response_transformer.py b/src/tests/test_response_transformer.py
index a1c12af..52058ca 100644
--- a/src/tests/test_response_transformer.py
+++ b/src/tests/test_response_transformer.py
@@ -314,14 +314,14 @@ def test_grep_transform_surfaces_data_source(self):
                     "matches": [
                         {"lineNumber": 3, "startColumn": 0, "endColumn": 4, "lineText": "auth"}
                     ],
-                    "dataSource": {"type": "repository", "id": "ds-main", "name": "repo (main)"},
+                    "dataSource": {"type": "repository", "id": "ds-main", "name": "backend"},
                 }
             ]
         }
 
         result = transform_grep_response(response)
 
-        assert result["results"][0]["dataSource"] == {"id": "ds-main", "name": "repo (main)"}
+        assert result["results"][0]["dataSource"] == {"id": "ds-main", "name": "backend"}
 
     def test_grep_transform_preserves_match_previews(self):
         response = {
diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py
index 2ffd371..54ab63f 100644
--- a/src/tools/artifact_relationships.py
+++ b/src/tools/artifact_relationships.py
@@ -112,6 +112,12 @@ async def get_artifact_relationships(
         "data_source": data_source,
     }
 
+    # Normalize the optional selector: treat empty/whitespace-only as "no selector"
+    # so we don't send a junk dataSource to the backend or echo it in the not-found hint.
+    # (tool_arguments above intentionally keeps the raw value for exact-invocation logging.)
+    if data_source is not None:
+        data_source = data_source.strip() or None
+
     if not identifier:
         logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning(
             "Tool validation failed: artifact identifier is required"
diff --git a/src/tools/fetch_artifacts.py b/src/tools/fetch_artifacts.py
index 962df26..d8d4487 100644
--- a/src/tools/fetch_artifacts.py
+++ b/src/tools/fetch_artifacts.py
@@ -81,6 +81,11 @@ async def fetch_artifacts(
     # deferred tools, LiveKit agents, etc.) into a proper Python list.
     identifiers = coerce_stringified_list(identifiers)
 
+    # Normalize the optional selector: treat empty/whitespace-only as "no selector"
+    # so we don't send a junk dataSource to the backend or echo it in the not-found hint.
+    if data_source is not None:
+        data_source = data_source.strip() or None
+
     if not identifiers:
         raise ToolError(f"[{_TOOL_NAME}] At least one identifier is required.")