From dcab23c49a21139484926e6bb95ebd8961b617a6 Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Thu, 25 Jun 2026 11:46:39 +0300 Subject: [PATCH 1/7] feat(operation): CORA-owned TransferPort + in-memory double (triage spike) Finalizes the data-transfer design triage with a CORA-shaped seam rather than a Globus transliteration. The verbs are begin/observe/cancel (a non-blocking observe-loop), the state set is Pending/Active/Suspended/ Succeeded/Failed/Cancelled, and a partial move is carried as files_failed>0 on a terminal Failed (no PartiallyFailed enum yet). The in-memory double plus a 2-BM/DMagic scenario show the deciding fact: a transfer is a long-running edge job (it waits through a non-terminal Suspended and folds a partial terminal), not a synchronous conductor step like ComputeStep. Not for merge: no production consumer yet and the build trigger has not fired. TransferPort takes the bare-verb port-suffix carve-out alongside ControlPort/ComputePort. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../adapters/in_memory_transfer_port.py | 160 ++++++++ apps/api/src/cora/operation/ports/__init__.py | 29 ++ .../src/cora/operation/ports/transfer_port.py | 351 ++++++++++++++++++ .../test_port_naming_conventions.py | 5 + .../unit/operation/test_transfer_port.py | 195 ++++++++++ 5 files changed, 740 insertions(+) create mode 100644 apps/api/src/cora/operation/adapters/in_memory_transfer_port.py create mode 100644 apps/api/src/cora/operation/ports/transfer_port.py create mode 100644 apps/api/tests/unit/operation/test_transfer_port.py diff --git a/apps/api/src/cora/operation/adapters/in_memory_transfer_port.py b/apps/api/src/cora/operation/adapters/in_memory_transfer_port.py new file mode 100644 index 00000000000..93898687905 --- /dev/null +++ b/apps/api/src/cora/operation/adapters/in_memory_transfer_port.py @@ -0,0 +1,160 @@ +"""In-memory `TransferPort` test double for unit / scenario tiers. + +Dict-backed, no substrate. A test seeds the observation progression a begun +move will report; the engine-under-test calls `begin` / `observe` / `cancel` +against the same instance and walks the seeded snapshots toward a terminal. +This is the only `TransferPort` adapter that exists: there is no production +substrate adapter yet, because the build trigger has not fired (see +`cora.operation.ports.transfer_port`). The fake serves the triage that asks +whether a transfer is a conductor step or a long-running edge job. + +## Seeding model + +A move reports a SEQUENCE of `TransferProgress` snapshots, one per `observe`, +clamping on the last. That models the real shape the synchronous compute fake +cannot: a move that is `Active` for several polls before a terminal, or that +goes `Suspended` mid-flight and waits. `set_next_progression` seeds the whole +sequence the next begun move yields (FIFO across begins); `set_next_terminal` +is the one-snapshot convenience for a move that is observed at its terminal +straight away. With nothing seeded a move succeeds on first observe, so a +happy-path test needs no seeding. `set_next_begin_error` seeds a substrate +refusal the next `begin` raises. + +## Cancellation + +`cancel` flips a non-terminal move so subsequent `observe` reports `Cancelled`, +carrying the last seen counts; cancelling an already-terminal move is a no-op, +matching the port contract. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from cora.operation.ports.transfer_port import ( + TransferHandle, + TransferProgress, + TransferRequest, + TransferState, +) + + +@dataclass +class _Movement: + """The seeded observation sequence bound to one begun handle.""" + + request: TransferRequest + snapshots: list[TransferProgress] + cursor: int = 0 + cancelled: bool = False + last_returned: TransferProgress | None = None + + +@dataclass +class InMemoryTransferPort: + """Process-local dict adapter for `TransferPort`. + + See module docstring for the seeding model and cancellation behaviour. + """ + + _progressions: list[tuple[TransferProgress, ...]] = field( + default_factory=list[tuple[TransferProgress, ...]] + ) + _begin_errors: list[Exception] = field(default_factory=list[Exception]) + _movements: dict[TransferHandle, _Movement] = field( + default_factory=dict[TransferHandle, _Movement] + ) + _counter: int = 0 + _closed: bool = False + + def set_next_progression(self, snapshots: tuple[TransferProgress, ...]) -> None: + """Seed the observation sequence the next begun move yields (FIFO). + + Each `observe` returns the next snapshot, clamping on the last, so a + sequence like `(Active, Active, Succeeded)` models a move polled to a + terminal. An empty sequence is rejected (a move always reports + something). + """ + if not snapshots: + msg = "a transfer progression needs at least one snapshot" + raise ValueError(msg) + self._progressions.append(snapshots) + + def set_next_terminal( + self, + state: TransferState, + *, + bytes_moved: int = 0, + files_total: int | None = None, + files_moved: int = 0, + files_skipped: int = 0, + files_failed: int = 0, + detail: str | None = None, + ) -> None: + """Seed a single-snapshot progression observed at `state` straight away. + + Convenience over `set_next_progression` for a move a test wants to read + at its terminal (or any single state) without a multi-step sequence. + """ + self.set_next_progression( + ( + TransferProgress( + state=state, + bytes_moved=bytes_moved, + files_total=files_total, + files_moved=files_moved, + files_skipped=files_skipped, + files_failed=files_failed, + detail=detail, + ), + ) + ) + + def set_next_begin_error(self, error: Exception) -> None: + """Seed an exception the next `begin` raises (FIFO), e.g. a refusal.""" + self._begin_errors.append(error) + + async def begin(self, request: TransferRequest) -> TransferHandle: + if self._begin_errors: + raise self._begin_errors.pop(0) + self._counter += 1 + handle = TransferHandle(f"inmem-transfer-{self._counter}") + if self._progressions: + snapshots = list(self._progressions.pop(0)) + else: + snapshots = [TransferProgress(state=TransferState.SUCCEEDED)] + self._movements[handle] = _Movement(request=request, snapshots=snapshots) + return handle + + async def observe(self, handle: TransferHandle) -> TransferProgress: + movement = self._movements[handle] + if movement.cancelled: + base = movement.last_returned or movement.snapshots[0] + snapshot = TransferProgress( + state=TransferState.CANCELLED, + bytes_moved=base.bytes_moved, + files_total=base.files_total, + files_moved=base.files_moved, + files_skipped=base.files_skipped, + files_failed=base.files_failed, + ) + movement.last_returned = snapshot + return snapshot + snapshot = movement.snapshots[movement.cursor] + if movement.cursor < len(movement.snapshots) - 1: + movement.cursor += 1 + movement.last_returned = snapshot + return snapshot + + async def cancel(self, handle: TransferHandle) -> None: + movement = self._movements[handle] + if movement.last_returned is not None and movement.last_returned.state.is_terminal: + return + movement.cancelled = True + + async def aclose(self) -> None: + """No-op for the in-memory double; idempotent.""" + self._closed = True + + +__all__ = ["InMemoryTransferPort"] diff --git a/apps/api/src/cora/operation/ports/__init__.py b/apps/api/src/cora/operation/ports/__init__.py index b39db5c57aa..8da090b06fc 100644 --- a/apps/api/src/cora/operation/ports/__init__.py +++ b/apps/api/src/cora/operation/ports/__init__.py @@ -16,6 +16,11 @@ submission (submit / await / fetch artifact), distilled from a single local-process adapter. A routing registry is deferred to the second real substrate, exactly as ControlPort earned its registry. + +`TransferPort` is the data-movement sibling: domain-shaped byte movement +(begin / observe / cancel), non-blocking because a transfer can be long +running, can suspend mid-flight, and can finish partially. It ships with +a test double only, pending the build trigger; see its module docstring. """ from cora.operation.ports.compute_port import ( @@ -52,6 +57,19 @@ ProcedureActivityLookup, ProcedureActivityRecency, ) +from cora.operation.ports.transfer_port import ( + NoRouteForLocationError, + TransferAccessDeniedError, + TransferEndpointUnreachableError, + TransferHandle, + TransferIntegrityError, + TransferPort, + TransferProgress, + TransferRejectedError, + TransferRequest, + TransferState, + TransferTimeoutError, +) __all__ = [ "ArtifactNotFoundError", @@ -77,7 +95,18 @@ "MeasurementKind", "MeasurementNotFoundError", "NoAdapterForAddressError", + "NoRouteForLocationError", "ProcedureActivityLookup", "ProcedureActivityRecency", "Quality", + "TransferAccessDeniedError", + "TransferEndpointUnreachableError", + "TransferHandle", + "TransferIntegrityError", + "TransferPort", + "TransferProgress", + "TransferRejectedError", + "TransferRequest", + "TransferState", + "TransferTimeoutError", ] diff --git a/apps/api/src/cora/operation/ports/transfer_port.py b/apps/api/src/cora/operation/ports/transfer_port.py new file mode 100644 index 00000000000..d938840bf1e --- /dev/null +++ b/apps/api/src/cora/operation/ports/transfer_port.py @@ -0,0 +1,351 @@ +"""TransferPort: CORA's domain-shaped seam for moving a dataset's bytes. + +`TransferPort` is the async Protocol the conducting engine uses to begin a +bulk byte movement, observe its progress, and cancel it. It is owned and +shaped by CORA, from what CORA's own domain needs: the engine has to start a +move, watch a possibly long-running move toward a terminal, and learn whether +every byte arrived so the Data BC can mark the landed `Distribution` verified. +Concrete substrates (a Globus transfer task, the APS Data Management DAQ +service, an S3 copy) are OUTSIDE adapters that get tested against this shape +later; they do not define it. An adapter translates the substrate's wire +vocabulary into the CORA-owned value types below, the same ACL posture the +control and compute adapters take. + +This is the data-movement member of CORA's actuation seam, alongside +`ControlPort` (drive hardware) and `ComputePort` (drive a compute job). All +three are seams the engine reaches across to actuate the outside world; per +the seam model, moving bytes through Globus is the same posture as driving a +motor through EPICS, not a competition with it. The three are siblings in +role, not copies in shape: a transfer is neither value-IO nor a single job, +so its surface is its own. + +## Earned, not minted + +There is no production consumer yet. This port exists to finalize the design +triage (is a transfer a step the conductor walks, or a long-running edge job +with its own lifecycle?), and ships with a test double only. A real adapter +(Globus first) and the executing aggregate land when the build trigger fires: +a promote or publish rule that blocks on a transfer outcome, an in-path +substrate with a native partial terminal, or a custody chain the existing +`Attestation` path cannot carry. Until then this is a spike, not a merge. + +## Why `begin` / `observe` / `cancel`, not blocking submit-and-await + +`ComputePort` blocks (`submit` then `await_terminal_state`) because a compute +job is bounded: a reconstruction runs for minutes and the engine can hold the +call open. A transfer is different in kind. It can be large, slow, and partial: +a sync of an experiment directory may move for hours, may stall waiting for a +credential to be renewed, and may finish with most files moved but a few +failed. So the engine does not block on a transfer; it begins one, gets back +an opaque handle, and observes that handle on its own cadence until the +progress reports a terminal state. That non-blocking begin-then-observe shape +is the whole reason a transfer wants a long-running edge job rather than a +synchronous step. + +## Out of scope (deferred, each with its trigger) + +- A routing registry / multi-substrate dispatch. Triggers at the second real + adapter, exactly as ControlPort earned its registry from a third substrate. +- A `PartiallyFailed` terminal state. The first real substrate (Globus) has no + native partial terminal: a sync that skips unchanged files still ends + succeeded, and a genuine partial appears only as a failed terminal carrying + a non-zero failed-file count. So partial-ness is carried on `TransferProgress` + (a terminal `Failed` with `files_failed > 0` and `files_moved > 0`), not as + an enum value, until an adapter with a native partial terminal earns it. +- Resume of an interrupted transfer. Does not generalize across substrates + (a directory sync re-derives its own delta; a single-file move restarts); + capability-flagged when a substrate needs it. + +## Actuation kind + +`ActuationKind` (Physical / Simulated / Hybrid) is deliberately NOT on this +surface. It is a property of the ROUTE a move is dispatched to, not of the +port, so the engine observes it from the routing table the same way the +control path does, never from the transfer adapter class. A rehearsal move on +a simulated route taints its output exactly as a simulated control episode +does. + +## Integrity + +`verify_on_arrival` asks the substrate to self-check that the bytes that +landed match the source and re-move on mismatch. It is a convenience that +fails a bad move early. It does NOT replace CORA's authoritative custody +record: the landed `Distribution` becomes verified only through the Data BC's +`Attestation` path (`ChecksumVerifier`), which walks the bytes CORA can see. + +## Exceptions + +Six exception families describe the move's failure modes. `TransferPort` is +not REST-accessible; the executing edge job captures these as event-payload +metadata per the non-determinism principle, never as HTTP errors. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import StrEnum +from typing import NewType, Protocol, runtime_checkable + +TransferHandle = NewType("TransferHandle", str) +"""Opaque substrate handle to one begun move, returned by `begin`. + +The substrate mints it (a Globus task id, a DM DAQ directory key, an S3 copy +token); the engine treats it as an opaque correlation key it passes back to +`observe` and `cancel`, and snapshots onto the edge job's event for audit. A +separate CORA-supplied `idempotency_key` on the request (not this handle) is +what makes a retried `begin` safe. +""" + + +class TransferState(StrEnum): + """The coarse lifecycle of one move, as CORA needs to drive its edge job. + + Adapters MAP a substrate's own statuses into these; the set is CORA's, not + any substrate's. There is one in-flight value, one intervention value, and + three terminals. + + `Pending`: begun and accepted, not yet observed moving. + `Active`: bytes are moving. + `Suspended`: stalled and will not continue without intervention (the + canonical cause is an expired credential on a long sync). NOT a + terminal: a renewed credential returns it to `Active`. + `Succeeded`: the move finished and every in-scope byte arrived. A sync that + skipped unchanged files still ends here. + `Failed`: the move reached a terminal without full success. This carries + BOTH the total-failure case (`files_moved == 0`) and the partial case + (`files_moved > 0` and `files_failed > 0`); the counts on + `TransferProgress` tell them apart. A caller-issued `cancel` also lands + here unless the substrate distinguishes it (see `Cancelled`). + `Cancelled`: the move stopped because the engine asked it to. Distinct from + `Failed` so an operator reads "we stopped this" differently from "this + broke," for substrates that report the difference. + """ + + PENDING = "Pending" + ACTIVE = "Active" + SUSPENDED = "Suspended" + SUCCEEDED = "Succeeded" + FAILED = "Failed" + CANCELLED = "Cancelled" + + @property + def is_terminal(self) -> bool: + """True for the three end states; False for Pending / Active / Suspended. + + Suspended is explicitly non-terminal: it is the long-sync intervention + state the engine waits through, not an end the edge job folds on. + """ + return self in {TransferState.SUCCEEDED, TransferState.FAILED, TransferState.CANCELLED} + + +@dataclass(frozen=True) +class TransferRequest: + """What CORA asks a substrate to move, in CORA's own vocabulary. + + `source` and `destination` are CORA location strings the routed adapter + parses (a Globus `endpoint_id:/path`, a DM `@host:/path`, an `s3://` + URI); at v1 they are plain strings, promoted to a typed location sum at + the second-substrate trigger, the same path `ControlPort.address` reserves. + `recursive` declares the source is a directory tree, not a single file. + `skip_unchanged` asks the substrate not to re-move bytes already present and + identical at the destination (a re-sync of an experiment directory). + `verify_on_arrival` asks the substrate to self-check integrity and re-move + on mismatch (see module docstring: this does not replace the Attestation + record). `label` is an operator-facing name for the move. `idempotency_key` + is a CORA-minted key that lets a retried `begin` dedupe at the substrate + instead of starting a duplicate move, so a replay stays deterministic. + """ + + source: str + destination: str + recursive: bool = False + skip_unchanged: bool = False + verify_on_arrival: bool = False + label: str | None = None + idempotency_key: str | None = None + + +@dataclass(frozen=True) +class TransferProgress: + """A snapshot of one move, returned by `observe`. + + Carries the lifecycle `state` plus the coarse counts CORA needs to fold + onto its edge job and show an operator. `files_total` is None until the + substrate has finished scanning (it can grow as a recursive move discovers + directories). `files_skipped` counts unchanged files a `skip_unchanged` + move did not re-move. `files_failed` is the partial signal: on a terminal + `Failed` state, `files_failed > 0` with `files_moved > 0` is a partial + move, while `files_moved == 0` is a total failure. `detail` carries the + substrate's human-readable sub-status (a stall reason, a fatal-error + summary) for the operator-facing record. + """ + + state: TransferState + bytes_moved: int = 0 + files_total: int | None = None + files_moved: int = 0 + files_skipped: int = 0 + files_failed: int = 0 + detail: str | None = None + + @property + def is_partial(self) -> bool: + """True for a terminal `Failed` that nonetheless moved some files. + + The carried-on-counts stand-in for a native `PartiallyFailed` terminal + (deferred until a substrate reports one). Lets a consumer separate + "moved most of it, some failed" from "moved nothing." + """ + return self.state is TransferState.FAILED and self.files_moved > 0 and self.files_failed > 0 + + +class TransferEndpointUnreachableError(Exception): + """The source or destination substrate could not be reached at all. + + A configuration or environment gap (endpoint offline, host unresolvable, + credentials absent), not a rejection of the specific move. Distinct from + `TransferRejectedError`: the substrate is unreachable, not refusing. + """ + + def __init__(self, endpoint: str) -> None: + super().__init__(f"Transfer endpoint {endpoint!r} unreachable") + self.endpoint = endpoint + + +class TransferRejectedError(Exception): + """The substrate refused the move at begin time. + + A malformed path, a quota breach, an unsupported source/destination pair. + The substrate is reachable; it is this move it would not accept. + """ + + def __init__(self, reason: str) -> None: + super().__init__(f"Transfer rejected: {reason}") + self.reason = reason + + +class TransferAccessDeniedError(Exception): + """The substrate denied access to a location. + + Credentials present but insufficient for the source read or destination + write. Distinct from `TransferEndpointUnreachableError` (no credentials / + no endpoint) so an operator separates "you may not" from "it is not there." + """ + + def __init__(self, endpoint: str) -> None: + super().__init__(f"Transfer access denied for {endpoint!r}") + self.endpoint = endpoint + + +class TransferIntegrityError(Exception): + """A verify-on-arrival check found landed bytes that do not match the source. + + Raised only when the substrate definitively reports an integrity failure it + will not resolve by re-moving. The edge job records a failed move; no + `Distribution` is marked verified off mismatched bytes. + """ + + def __init__(self, handle: TransferHandle, location: str) -> None: + super().__init__(f"Transfer {handle!r} integrity check failed at {location!r}") + self.handle = handle + self.location = location + + +class TransferTimeoutError(Exception): + """An adapter's own status call exceeded its ceiling before answering. + + The adapter giving up on a substrate that never reported back, distinct + from a move that is legitimately slow (that is an `Active` observation the + engine keeps watching). Carries the breached ceiling so logs separate "we + stopped asking" from "the move ran long." + """ + + def __init__(self, handle: TransferHandle, timeout_s: float) -> None: + super().__init__(f"Transfer {handle!r} status call exceeded {timeout_s}s") + self.handle = handle + self.timeout_s = timeout_s + + +class NoRouteForLocationError(Exception): + """No registered adapter route matches a request location. + + Almost always a configuration gap (a new endpoint or scheme without a + matching route). The mirror of the control path's no-adapter-for-address + miss; lives next to the port so a routing layer can raise it. + """ + + def __init__(self, location: str) -> None: + super().__init__(f"Transfer location {location!r} has no matching adapter") + self.location = location + + +@runtime_checkable +class TransferPort(Protocol): + """CORA-owned seam for moving a dataset's bytes across a substrate. + + Substrate-agnostic. Concrete adapters (a future `GlobusTransferPort`, + `DmDaqTransferPort`, `S3TransferPort`) implement the wire details and map + their statuses into `TransferState`; the engine never touches a + substrate-specific symbol. Non-blocking by design: `begin` returns a handle + and the engine `observe`s it to a terminal on its own cadence, because a + transfer can be long-running, can suspend mid-flight, and can finish + partially. A routing registry, a native partial terminal, and resume are + deferred to their own triggers (see module docstring). + """ + + async def begin(self, request: TransferRequest) -> TransferHandle: + """Begin the move described by `request` and return its handle. + + Returns as soon as the substrate accepts the move (a submitted task, a + started sync). Raises `TransferRejectedError` if the substrate refuses + the request, `TransferEndpointUnreachableError` if a substrate is + unreachable, `TransferAccessDeniedError` on a permission denial, or + `NoRouteForLocationError` if no adapter route matches a location. + """ + ... + + async def observe(self, handle: TransferHandle) -> TransferProgress: + """Return a current `TransferProgress` snapshot for `handle`. + + Non-blocking: reports wherever the move is now (`Pending` / `Active` / + `Suspended` or a terminal), so the engine polls on its own cadence + rather than holding a call open. Raises + `TransferEndpointUnreachableError` or `TransferAccessDeniedError` if the + status cannot be read, `TransferTimeoutError` if the adapter's status + call itself times out, or `TransferIntegrityError` when the substrate + definitively reports a verify-on-arrival mismatch. + """ + ... + + async def cancel(self, handle: TransferHandle) -> None: + """Ask the substrate to stop the move identified by `handle`. + + Best-effort and idempotent: cancelling an already-terminal move is a + no-op. A successfully cancelled move is observed as `Cancelled` (or + `Failed` on a substrate that does not distinguish the two). + """ + ... + + async def aclose(self) -> None: + """Release any substrate resources; idempotent. + + Provided so composition code can `aclose()` any `TransferPort` without + branching on type, the same affordance the control and compute ports + offer. A dict-backed test double is a no-op. + """ + ... + + +__all__ = [ + "NoRouteForLocationError", + "TransferAccessDeniedError", + "TransferEndpointUnreachableError", + "TransferHandle", + "TransferIntegrityError", + "TransferPort", + "TransferProgress", + "TransferRejectedError", + "TransferRequest", + "TransferState", + "TransferTimeoutError", +] diff --git a/apps/api/tests/architecture/test_port_naming_conventions.py b/apps/api/tests/architecture/test_port_naming_conventions.py index 37ff5faeeb9..73cf0444bad 100644 --- a/apps/api/tests/architecture/test_port_naming_conventions.py +++ b/apps/api/tests/architecture/test_port_naming_conventions.py @@ -57,6 +57,11 @@ "graceful role noun 'JobRunner' is reserved for the multi-substrate " "registry hoist, so it stays ComputePort (sibling of ControlPort)" ), + "TransferPort": ( + "data-movement seam; 'Transfer' is a bare verb and the graceful agent " + "noun ('Mover' / 'Courier') reads worse than the bare-verb sibling pair, " + "so it stays TransferPort (sibling of ControlPort / ComputePort)" + ), "SignaturePort": "stripping to 'Signature' collides with the Signature value object", "PublishPort": "federation publish seam; 'Publish' is a bare verb, no graceful agent noun", "PullPort": "federation pull seam; 'Pull' is a bare verb, no graceful agent noun", diff --git a/apps/api/tests/unit/operation/test_transfer_port.py b/apps/api/tests/unit/operation/test_transfer_port.py new file mode 100644 index 00000000000..41a0923dc03 --- /dev/null +++ b/apps/api/tests/unit/operation/test_transfer_port.py @@ -0,0 +1,195 @@ +"""Behavioural triage scenario for `TransferPort` + `InMemoryTransferPort`. + +Grounds the design triage (is a transfer a conductor step, or a long-running +edge job?) in the real shape of a 2-BM data sync. DMagic, the tool 2-BM uses +today, syncs an experiment's raw directory and its reconstructed (`_rec`) +sibling to a managed store; it starts the move and lets it run, it does not +block on it. These tests drive the same begin-observe-until-terminal loop an +edge job would run, and exercise the cases that decide the triage: a move that +is polled across several `Active` observations, a re-sync that skips unchanged +files, a partial move, a credential-expiry suspension mid-sync, and cancel. + +The contrast with the compute path is the finding: a compute job is awaited in +one blocking call, but a transfer is begun once and observed to a terminal on +the engine's own cadence, through a non-terminal `Suspended`, which is why it +wants a long-running edge job rather than a synchronous step. +""" + +import pytest + +from cora.operation.adapters.in_memory_transfer_port import InMemoryTransferPort +from cora.operation.ports.transfer_port import ( + TransferHandle, + TransferPort, + TransferProgress, + TransferRejectedError, + TransferRequest, + TransferState, +) + +_SOURCE = "aps-dm:/gdata/dm/2BM/exp123/" +_DESTINATION = "globus-archive:/archive/2BM/exp123/" +_RAW_SYNC = TransferRequest(source=_SOURCE, destination=_DESTINATION, recursive=True) +_RESYNC = TransferRequest( + source=_SOURCE, destination=_DESTINATION, recursive=True, skip_unchanged=True +) + +_MAX_POLLS = 20 + + +async def _drive_to_terminal( + port: TransferPort, handle: TransferHandle +) -> tuple[TransferProgress, list[TransferState]]: + """Poll `observe` like an edge job would: until a terminal, waiting through Suspended. + + Returns the terminal snapshot plus the ordered states seen, so a test can + assert the move passed through (for example) a `Suspended` observation + before terminating. Bounded so a misseeded test fails loudly instead of + looping forever. + """ + seen: list[TransferState] = [] + for _ in range(_MAX_POLLS): + progress = await port.observe(handle) + seen.append(progress.state) + if progress.state.is_terminal: + return progress, seen + msg = f"transfer {handle!r} never reached a terminal within {_MAX_POLLS} polls" + raise AssertionError(msg) + + +@pytest.mark.unit +async def test_directory_sync_polled_across_active_observations_succeeds() -> None: + port = InMemoryTransferPort() + port.set_next_progression( + ( + TransferProgress(state=TransferState.PENDING), + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=120), + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=380), + TransferProgress( + state=TransferState.SUCCEEDED, files_total=400, files_moved=400, bytes_moved=8_000 + ), + ) + ) + handle = await port.begin(_RAW_SYNC) + terminal, seen = await _drive_to_terminal(port, handle) + + assert terminal.state is TransferState.SUCCEEDED + assert terminal.files_moved == 400 + assert TransferState.ACTIVE in seen + assert seen[-1] is TransferState.SUCCEEDED + + +@pytest.mark.unit +async def test_resync_of_rec_directory_skips_unchanged_and_still_succeeds() -> None: + port = InMemoryTransferPort() + port.set_next_terminal( + TransferState.SUCCEEDED, files_total=400, files_moved=12, files_skipped=388 + ) + handle = await port.begin(_RESYNC) + progress = await port.observe(handle) + + assert progress.state is TransferState.SUCCEEDED + assert progress.files_skipped == 388 + assert progress.is_partial is False + + +@pytest.mark.unit +async def test_partial_move_is_failed_terminal_carrying_a_failed_count() -> None: + port = InMemoryTransferPort() + port.set_next_terminal( + TransferState.FAILED, + files_total=400, + files_moved=397, + files_failed=3, + detail="3 subtasks failed", + ) + handle = await port.begin(_RAW_SYNC) + progress = await port.observe(handle) + + assert progress.state is TransferState.FAILED + assert progress.is_partial is True + assert progress.files_failed == 3 + assert progress.files_moved == 397 + + +@pytest.mark.unit +async def test_total_failure_is_failed_terminal_that_is_not_partial() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.FAILED, files_total=400, files_moved=0, files_failed=400) + handle = await port.begin(_RAW_SYNC) + progress = await port.observe(handle) + + assert progress.state is TransferState.FAILED + assert progress.is_partial is False + + +@pytest.mark.unit +async def test_credential_expiry_suspends_mid_sync_then_resumes_to_succeeded() -> None: + port = InMemoryTransferPort() + port.set_next_progression( + ( + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=200), + TransferProgress( + state=TransferState.SUSPENDED, files_moved=200, detail="credential expired" + ), + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=350), + TransferProgress(state=TransferState.SUCCEEDED, files_total=400, files_moved=400), + ) + ) + handle = await port.begin(_RAW_SYNC) + terminal, seen = await _drive_to_terminal(port, handle) + + assert TransferState.SUSPENDED in seen + assert TransferState.SUSPENDED.is_terminal is False + assert terminal.state is TransferState.SUCCEEDED + + +@pytest.mark.unit +async def test_substrate_refusal_raises_transfer_rejected_at_begin() -> None: + port = InMemoryTransferPort() + port.set_next_begin_error(TransferRejectedError("destination quota exceeded")) + with pytest.raises(TransferRejectedError): + await port.begin(_RAW_SYNC) + + +@pytest.mark.unit +async def test_cancel_makes_subsequent_observation_report_cancelled() -> None: + port = InMemoryTransferPort() + port.set_next_progression( + ( + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=120), + TransferProgress(state=TransferState.ACTIVE, files_total=400, files_moved=260), + ) + ) + handle = await port.begin(_RAW_SYNC) + in_flight = await port.observe(handle) + assert in_flight.state is TransferState.ACTIVE + + await port.cancel(handle) + after = await port.observe(handle) + assert after.state is TransferState.CANCELLED + assert after.files_moved == 120 + + +@pytest.mark.unit +async def test_cancel_of_already_terminal_move_is_a_noop() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.SUCCEEDED, files_total=400, files_moved=400) + handle = await port.begin(_RAW_SYNC) + assert (await port.observe(handle)).state is TransferState.SUCCEEDED + + await port.cancel(handle) + assert (await port.observe(handle)).state is TransferState.SUCCEEDED + + +@pytest.mark.unit +async def test_in_memory_double_satisfies_the_transfer_port_protocol() -> None: + port = InMemoryTransferPort() + assert isinstance(port, TransferPort) + + +@pytest.mark.unit +async def test_aclose_is_idempotent_noop() -> None: + port = InMemoryTransferPort() + await port.aclose() + await port.aclose() From 183bac71e3a9d774e0ff0464a9d9ac7005fb6139 Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Thu, 25 Jun 2026 11:52:08 +0300 Subject: [PATCH 2/7] feat(operation): GlobusTransferPort adapter over globus-sdk (triage spike) The first real TransferPort substrate, validating the CORA-owned shape against an outside adapter rather than a fake alone. It takes an already-authorized globus_sdk TransferClient by injection (the OAuth2 dance stays a composition-root concern), builds a TransferData submission, and maps Globus task status into TransferState (INACTIVE -> Suspended, a FAILED task with subtasks_failed>0 -> the partial signal). Globus calls run in asyncio.to_thread since the client is synchronous. Error mapping: NetworkError -> EndpointUnreachable; GlobusAPIError dispatched on http_status -> AccessDenied / EndpointUnreachable / Rejected. Adds globus-sdk>=3,<4 (resolved 3.65) as a hard dep, following the aioca/p4p substrate-adapter convention. Unit-tested against a fake TransferClient; NOT run against a live Globus endpoint (needs credentials + two collections). Still a spike: no production consumer until the build trigger fires. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/api/pyproject.toml | 12 + .../adapters/globus_transfer_port.py | 234 +++++++++++++++ .../operation/test_globus_transfer_port.py | 282 ++++++++++++++++++ apps/api/uv.lock | 16 + 4 files changed, 544 insertions(+) create mode 100644 apps/api/src/cora/operation/adapters/globus_transfer_port.py create mode 100644 apps/api/tests/unit/operation/test_globus_transfer_port.py diff --git a/apps/api/pyproject.toml b/apps/api/pyproject.toml index dab3c2e2450..8a9d961c899 100644 --- a/apps/api/pyproject.toml +++ b/apps/api/pyproject.toml @@ -75,6 +75,18 @@ dependencies = [ # ControlTimeoutError; `p4p.client.asyncio.Disconnected` -> # ControlNotConnectedError; `RemoteError` -> ControlWriteRejectedError. "p4p>=4,<5", + # globus-sdk (TransferPort arc): production Globus Transfer client used by + # `GlobusTransferPort` at `cora/operation/adapters/`, the first real + # `TransferPort` substrate. The adapter takes an already-authorized + # `TransferClient` by injection (the OAuth2 dance is a composition-root + # concern, not the adapter's), builds a `TransferData` payload, and maps + # Globus task status (ACTIVE / INACTIVE / SUCCEEDED / FAILED) into + # `TransferState`; INACTIVE -> Suspended (credential expiry), a FAILED task + # carrying subtasks_failed > 0 -> the partial signal. Error mapping: + # `GlobusAPIError` dispatched on `.http_status` -> Rejected / AccessDenied / + # EndpointUnreachable; `NetworkError` / connection / timeout -> the + # transport families. Pin <4 to flag a future major SDK shift in CI. + "globus-sdk>=3,<4", ] [dependency-groups] diff --git a/apps/api/src/cora/operation/adapters/globus_transfer_port.py b/apps/api/src/cora/operation/adapters/globus_transfer_port.py new file mode 100644 index 00000000000..4ffbc3af9ad --- /dev/null +++ b/apps/api/src/cora/operation/adapters/globus_transfer_port.py @@ -0,0 +1,234 @@ +"""GlobusTransferPort: production `TransferPort` over the Globus Transfer service. + +The first real substrate behind CORA's `TransferPort`. It takes an +already-authorized Globus `TransferClient` by injection (the OAuth2 dance is a +composition-root concern, not the adapter's), translates a `TransferRequest` +into a Globus `TransferData` submission, and maps Globus task status back into +CORA's `TransferState`. CORA owns the seam; this adapter is an ACL that speaks +Globus on one side and CORA vocabulary on the other. + +The Globus client is synchronous (it speaks HTTP via `requests`), so every +call is run in a worker thread (`asyncio.to_thread`) to keep the async port +contract: the event loop is never blocked on a network round-trip. + +## Status mapping + +Globus reports exactly four task statuses; CORA's `TransferState` adds the +Pending/Cancelled framing the engine needs: + +- `ACTIVE` -> `Active` +- `INACTIVE` -> `Suspended` (Globus only enters INACTIVE on credential + expiry, which is exactly CORA's intervention-required state) +- `SUCCEEDED` -> `Succeeded` +- `FAILED` -> `Failed` + +A partial move has no native Globus terminal: a sync that skips unchanged +files still ends `SUCCEEDED`, and a genuine partial ends `FAILED` carrying +`subtasks_failed > 0` alongside `files_transferred > 0`. That is exactly the +counts-on-`TransferProgress` partial signal the port models, so no enum value +is needed. A caller-issued `cancel` lands the Globus task in `FAILED`; Globus +does not distinguish it from a fault on the status alone, which the port +explicitly allows ("Failed on a substrate that does not distinguish the two"). + +## Error mapping + +Globus transport failures (`NetworkError` and its connection / timeout +subclasses) become `TransferEndpointUnreachableError`: the substrate did not +answer. Globus API errors (`GlobusAPIError`) are dispatched on `http_status`: +401 / 403 -> `TransferAccessDeniedError`, 5xx -> `TransferEndpointUnreachableError`, +and everything else the substrate refused (400 / 404 / 409 / 429 / ...) -> +`TransferRejectedError`. This adapter does not raise `TransferTimeoutError` +(a transport timeout folds into unreachable, since the adapter sets no await +ceiling of its own) nor `TransferIntegrityError` (a verify-on-arrival mismatch +surfaces as a `Failed` observation carrying the substrate's fatal-error text, +not an exception); both stay available for adapters that need them. + +## Not run against live Globus + +The adapter is unit-tested against a fake `TransferClient`; it has not been +exercised against a real Globus endpoint (that needs credentials and two live +collections). Treat live behaviour as unverified until a sanity run happens. +""" + +from __future__ import annotations + +import asyncio +from typing import TYPE_CHECKING, Any, Literal, Protocol + +from globus_sdk import GlobusAPIError, NetworkError, TransferData + +from cora.operation.ports.transfer_port import ( + TransferAccessDeniedError, + TransferEndpointUnreachableError, + TransferHandle, + TransferProgress, + TransferRejectedError, + TransferRequest, + TransferState, +) + +SyncLevel = Literal["exists", "size", "mtime", "checksum"] +"""Globus sync levels in ascending strictness. `skip_unchanged` maps to one of +these; `checksum` is the default because CORA's "do not re-move identical +bytes" intent is a byte-identity claim, which only the checksum level +guarantees (the cheaper levels compare size or mtime).""" + +_STATUS_TO_STATE: dict[str, TransferState] = { + "ACTIVE": TransferState.ACTIVE, + "INACTIVE": TransferState.SUSPENDED, + "SUCCEEDED": TransferState.SUCCEEDED, + "FAILED": TransferState.FAILED, +} + + +class _TransferResponse(Protocol): + """The slice of a Globus HTTP response this adapter reads. + + A `globus_sdk.response.GlobusHTTPResponse` satisfies this structurally, and + so does a test double, so the adapter depends on the read shape rather than + the concrete response class. + """ + + def __getitem__(self, key: str) -> Any: ... + def get(self, key: str, default: Any = None) -> Any: ... + + +class GlobusTransferClient(Protocol): + """The slice of `globus_sdk.TransferClient` this adapter calls. + + Owning the seam (rather than depending on the concrete client) keeps the + adapter unit-testable with a fake and pins exactly the three methods CORA + relies on. The real client satisfies it structurally; the TYPE_CHECKING + assertion below fails the type-check if it ever drifts. + """ + + def submit_transfer(self, data: TransferData) -> _TransferResponse: ... + def get_task(self, task_id: str) -> _TransferResponse: ... + def cancel_task(self, task_id: str) -> _TransferResponse: ... + + +if TYPE_CHECKING: + from typing import cast + + import globus_sdk + + # Static-only conformance: pyright fails here if the real Globus + # TransferClient ever drifts from the GlobusTransferClient seam. + _CLIENT_CONFORMANCE: GlobusTransferClient = cast("globus_sdk.TransferClient", ...) + + +def _split_location(location: str) -> tuple[str, str]: + """Split a CORA `endpoint:path` location into (endpoint_id, path). + + Raises `TransferRejectedError` on a malformed location before any network + call, so an authoring error fails fast rather than reaching Globus. + """ + endpoint, separator, path = location.partition(":") + if not separator or not endpoint or not path: + msg = f"location {location!r} is not in 'endpoint:path' form" + raise TransferRejectedError(msg) + return endpoint, path + + +def _classify_api_error( + http_status: int, code: str | None, message: str, *, endpoint: str +) -> Exception: + """Map a Globus API error's HTTP status to a CORA transfer error. + + Pure (no Globus types), so the mapping is unit-tested directly with plain + status codes. 401 / 403 are access denials; 5xx are the substrate failing + to serve (unreachable); everything else is the substrate refusing this + particular request. + """ + if http_status in (401, 403): + return TransferAccessDeniedError(endpoint) + if http_status in (500, 502, 503, 504): + return TransferEndpointUnreachableError(endpoint) + reason = f"{code or http_status}: {message}" if message else f"{code or http_status}" + return TransferRejectedError(reason) + + +def _map_task_status(response: _TransferResponse) -> TransferProgress: + """Translate a Globus get_task response into a CORA `TransferProgress`.""" + status = response["status"] + state = _STATUS_TO_STATE.get(status, TransferState.ACTIVE) + detail: str | None = None + if state is TransferState.FAILED: + fatal = response.get("fatal_error") + if fatal: + detail = fatal.get("description") or fatal.get("code") + elif state is TransferState.SUSPENDED: + detail = response.get("nice_status") + return TransferProgress( + state=state, + bytes_moved=response.get("bytes_transferred", 0), + files_total=response.get("files"), + files_moved=response.get("files_transferred", 0), + files_skipped=response.get("files_skipped", 0), + files_failed=response.get("subtasks_failed", 0), + detail=detail, + ) + + +class GlobusTransferPort: + """`TransferPort` backed by an injected Globus `TransferClient`. + + Construct with an already-authorized client; the adapter never builds the + authorizer. `skip_unchanged_sync_level` chooses how a `skip_unchanged` + request is realized (default `checksum`, the byte-identity guarantee). + """ + + def __init__( + self, + client: GlobusTransferClient, + *, + skip_unchanged_sync_level: SyncLevel = "checksum", + ) -> None: + self._client = client + self._skip_unchanged_sync_level: SyncLevel = skip_unchanged_sync_level + + async def begin(self, request: TransferRequest) -> TransferHandle: + source_endpoint, source_path = _split_location(request.source) + destination_endpoint, destination_path = _split_location(request.destination) + sync_level: SyncLevel | None = ( + self._skip_unchanged_sync_level if request.skip_unchanged else None + ) + data = TransferData( + source_endpoint=source_endpoint, + destination_endpoint=destination_endpoint, + label=request.label, + submission_id=request.idempotency_key, + sync_level=sync_level, + verify_checksum=request.verify_on_arrival, + ) + data.add_item(source_path, destination_path, recursive=request.recursive) + context = f"{request.source} -> {request.destination}" + response = await self._invoke(self._client.submit_transfer, data, context=context) + return TransferHandle(str(response["task_id"])) + + async def observe(self, handle: TransferHandle) -> TransferProgress: + response = await self._invoke(self._client.get_task, str(handle), context=str(handle)) + return _map_task_status(response) + + async def cancel(self, handle: TransferHandle) -> None: + await self._invoke(self._client.cancel_task, str(handle), context=str(handle)) + + async def aclose(self) -> None: + """No-op: the injected client's lifecycle is the composition root's.""" + + async def _invoke(self, call: Any, *args: Any, context: str) -> _TransferResponse: + """Run a synchronous Globus call off the event loop and map its failures.""" + try: + return await asyncio.to_thread(call, *args) + except NetworkError as exc: + raise TransferEndpointUnreachableError(context) from exc + except GlobusAPIError as exc: + raise _classify_api_error( + getattr(exc, "http_status", 0), + getattr(exc, "code", None), + getattr(exc, "message", "") or str(exc), + endpoint=context, + ) from exc + + +__all__ = ["GlobusTransferClient", "GlobusTransferPort", "SyncLevel"] diff --git a/apps/api/tests/unit/operation/test_globus_transfer_port.py b/apps/api/tests/unit/operation/test_globus_transfer_port.py new file mode 100644 index 00000000000..914fdf0bc78 --- /dev/null +++ b/apps/api/tests/unit/operation/test_globus_transfer_port.py @@ -0,0 +1,282 @@ +"""Unit tests for `GlobusTransferPort` against a fake Globus `TransferClient`. + +These exercise the adapter's translation both ways: a `TransferRequest` into a +Globus `TransferData` submission, and a Globus task response back into a +`TransferProgress`. The Globus client is faked, so nothing here touches a live +endpoint; live behaviour is unverified until a credentialed sanity run. +""" + +from typing import Any + +import pytest +import requests +from globus_sdk import NetworkError, TransferAPIError, TransferData +from requests.structures import CaseInsensitiveDict + +from cora.operation.adapters.globus_transfer_port import GlobusTransferPort +from cora.operation.ports.transfer_port import ( + TransferAccessDeniedError, + TransferEndpointUnreachableError, + TransferHandle, + TransferPort, + TransferRejectedError, + TransferRequest, + TransferState, +) + +_SOURCE = "src-endpoint:/data/dm/2BM/exp123/" +_DESTINATION = "dst-endpoint:/archive/2BM/exp123/" +_RAW_SYNC = TransferRequest(source=_SOURCE, destination=_DESTINATION, recursive=True) + + +class _FakeResponse: + """Dict-backed stand-in for a Globus HTTP response (read shape only).""" + + def __init__(self, data: dict[str, Any]) -> None: + self._data = data + + def __getitem__(self, key: str) -> Any: + return self._data[key] + + def get(self, key: str, default: Any = None) -> Any: + return self._data.get(key, default) + + +class _FakeTransferClient: + """Records what the adapter submits; replays seeded task responses / errors.""" + + def __init__( + self, + *, + task_id: str = "globus-task-1", + get_responses: list[dict[str, Any]] | None = None, + submit_error: Exception | None = None, + ) -> None: + self.submitted: list[TransferData] = [] + self.cancelled: list[str] = [] + self._task_id = task_id + self._get_responses = list(get_responses or []) + self._submit_error = submit_error + + def submit_transfer(self, data: TransferData) -> _FakeResponse: + if self._submit_error is not None: + raise self._submit_error + self.submitted.append(data) + return _FakeResponse({"task_id": self._task_id}) + + def get_task(self, task_id: str) -> _FakeResponse: + if self._get_responses: + return _FakeResponse(self._get_responses.pop(0)) + return _FakeResponse({"status": "SUCCEEDED"}) + + def cancel_task(self, task_id: str) -> _FakeResponse: + self.cancelled.append(task_id) + return _FakeResponse({"code": "Canceled"}) + + +def _api_error(status: int, *, code: str = "Error", message: str = "boom") -> TransferAPIError: + """Build a real `TransferAPIError` carrying `status` for the mapping tests.""" + response = requests.Response() + response.status_code = status + prepared = requests.PreparedRequest() + prepared.method = "POST" + prepared.url = "https://transfer.api.globus.org/v0.10/transfer" + prepared.headers = CaseInsensitiveDict() + response.request = prepared + response._content = f'{{"code":"{code}","message":"{message}"}}'.encode() + response.headers["Content-Type"] = "application/json" + return TransferAPIError(response) + + +@pytest.mark.unit +async def test_begin_returns_handle_from_the_task_id() -> None: + client = _FakeTransferClient(task_id="task-abc") + port = GlobusTransferPort(client) + handle = await port.begin(_RAW_SYNC) + assert handle == "task-abc" + + +@pytest.mark.unit +async def test_begin_builds_recursive_transferdata_with_endpoints_and_paths() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + await port.begin(_RAW_SYNC) + + data = client.submitted[0] + assert data.get("source_endpoint") == "src-endpoint" + assert data.get("destination_endpoint") == "dst-endpoint" + item = data["DATA"][0] + assert item.get("source_path") == "/data/dm/2BM/exp123/" + assert item.get("destination_path") == "/archive/2BM/exp123/" + assert item.get("recursive") is True + + +@pytest.mark.unit +async def test_begin_skip_unchanged_sets_a_checksum_sync_level_and_verify() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + await port.begin( + TransferRequest( + source=_SOURCE, + destination=_DESTINATION, + recursive=True, + skip_unchanged=True, + verify_on_arrival=True, + ) + ) + data = client.submitted[0] + # "checksum" is sync level 3 in the Globus wire form. + assert data.get("sync_level") == 3 + assert data.get("verify_checksum") is True + + +@pytest.mark.unit +async def test_begin_without_skip_unchanged_leaves_sync_level_unset() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + await port.begin(_RAW_SYNC) + assert client.submitted[0].get("sync_level") is None + + +@pytest.mark.unit +async def test_begin_passes_idempotency_key_as_submission_id() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + await port.begin( + TransferRequest(source=_SOURCE, destination=_DESTINATION, idempotency_key="sub-42") + ) + assert client.submitted[0].get("submission_id") == "sub-42" + + +@pytest.mark.unit +async def test_malformed_location_is_rejected_before_any_submit() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + with pytest.raises(TransferRejectedError): + await port.begin(TransferRequest(source="no-colon-here", destination=_DESTINATION)) + assert client.submitted == [] + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("status", "expected"), + [ + ("ACTIVE", TransferState.ACTIVE), + ("INACTIVE", TransferState.SUSPENDED), + ("SUCCEEDED", TransferState.SUCCEEDED), + ("FAILED", TransferState.FAILED), + ], +) +async def test_observe_maps_globus_status_to_transfer_state( + status: str, expected: TransferState +) -> None: + client = _FakeTransferClient(get_responses=[{"status": status}]) + port = GlobusTransferPort(client) + progress = await port.observe(TransferHandle("globus-task-1")) + assert progress.state is expected + + +@pytest.mark.unit +async def test_observe_maps_progress_counts_from_the_task_document() -> None: + client = _FakeTransferClient( + get_responses=[ + { + "status": "ACTIVE", + "bytes_transferred": 8_000, + "files": 400, + "files_transferred": 120, + "files_skipped": 8, + "subtasks_failed": 0, + } + ] + ) + port = GlobusTransferPort(client) + progress = await port.observe(TransferHandle("globus-task-1")) + assert progress.bytes_moved == 8_000 + assert progress.files_total == 400 + assert progress.files_moved == 120 + assert progress.files_skipped == 8 + + +@pytest.mark.unit +async def test_observe_partial_failed_task_carries_the_failed_count() -> None: + client = _FakeTransferClient( + get_responses=[ + { + "status": "FAILED", + "files": 400, + "files_transferred": 397, + "subtasks_failed": 3, + "fatal_error": {"code": "PERMISSION_DENIED", "description": "3 files unreadable"}, + } + ] + ) + port = GlobusTransferPort(client) + progress = await port.observe(TransferHandle("globus-task-1")) + assert progress.state is TransferState.FAILED + assert progress.is_partial is True + assert progress.files_failed == 3 + assert progress.detail == "3 files unreadable" + + +@pytest.mark.unit +async def test_observe_suspended_task_surfaces_the_credential_detail() -> None: + client = _FakeTransferClient( + get_responses=[{"status": "INACTIVE", "nice_status": "EXPIRED_CREDENTIALS"}] + ) + port = GlobusTransferPort(client) + progress = await port.observe(TransferHandle("globus-task-1")) + assert progress.state is TransferState.SUSPENDED + assert progress.detail == "EXPIRED_CREDENTIALS" + + +@pytest.mark.unit +async def test_cancel_calls_globus_cancel_task_with_the_handle() -> None: + client = _FakeTransferClient() + port = GlobusTransferPort(client) + await port.cancel(TransferHandle("globus-task-9")) + assert client.cancelled == ["globus-task-9"] + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("status", "expected"), + [ + (401, TransferAccessDeniedError), + (403, TransferAccessDeniedError), + (500, TransferEndpointUnreachableError), + (503, TransferEndpointUnreachableError), + (400, TransferRejectedError), + (404, TransferRejectedError), + (429, TransferRejectedError), + ], +) +async def test_globus_api_status_maps_to_the_right_transfer_error( + status: int, expected: type[Exception] +) -> None: + client = _FakeTransferClient(submit_error=_api_error(status)) + port = GlobusTransferPort(client) + with pytest.raises(expected): + await port.begin(_RAW_SYNC) + + +@pytest.mark.unit +async def test_network_error_maps_to_endpoint_unreachable() -> None: + client = _FakeTransferClient( + submit_error=NetworkError("connection refused", requests.ConnectionError("down")) + ) + port = GlobusTransferPort(client) + with pytest.raises(TransferEndpointUnreachableError): + await port.begin(_RAW_SYNC) + + +@pytest.mark.unit +def test_adapter_satisfies_the_transfer_port_protocol() -> None: + port = GlobusTransferPort(_FakeTransferClient()) + assert isinstance(port, TransferPort) + + +@pytest.mark.unit +async def test_aclose_is_a_noop() -> None: + port = GlobusTransferPort(_FakeTransferClient()) + await port.aclose() diff --git a/apps/api/uv.lock b/apps/api/uv.lock index dad180d79a4..7af9f3c29b0 100644 --- a/apps/api/uv.lock +++ b/apps/api/uv.lock @@ -315,6 +315,7 @@ dependencies = [ { name = "asyncpg" }, { name = "cryptography" }, { name = "fastapi" }, + { name = "globus-sdk" }, { name = "jsonschema-rs" }, { name = "mcp" }, { name = "opentelemetry-api" }, @@ -370,6 +371,7 @@ requires-dist = [ { name = "asyncpg", specifier = ">=0.31.0,<0.32" }, { name = "cryptography", specifier = ">=49.0.0,<50" }, { name = "fastapi", specifier = ">=0.138.0,<0.139" }, + { name = "globus-sdk", specifier = ">=3,<4" }, { name = "jsonschema-rs", specifier = ">=0.20,<1" }, { name = "mcp", specifier = ">=1.28.0,<2" }, { name = "opentelemetry-api", specifier = ">=1.41,<2" }, @@ -672,6 +674,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" }, ] +[[package]] +name = "globus-sdk" +version = "3.65.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/93/a6f6686bb2beb038047abe203b48c7f995f6a96884679eedf7e9aa34300b/globus_sdk-3.65.0.tar.gz", hash = "sha256:a4b350b980809e86d768c8e327de9ddee4405b60cfb83429cdf831ac0c63d763", size = 275365, upload-time = "2025-10-03T01:56:45.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/cd/f89c1d66a678d456887d305669ad929cb3ea742be1f563899a9949bcb41f/globus_sdk-3.65.0-py3-none-any.whl", hash = "sha256:d14154c3a40bb6c4d6a77e7200234d43358bd1daca9224841d4297f0edea80e6", size = 418025, upload-time = "2025-10-03T01:56:43.495Z" }, +] + [[package]] name = "googleapis-common-protos" version = "1.75.0" From b8db41ebb6d6ce2ec85ce83e7d8e7477166ec993 Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Thu, 25 Jun 2026 20:26:10 +0300 Subject: [PATCH 3/7] feat(operation): FdtTransferPort inner-leg mover (triage spike) The acquisition-to-analysis stage-in substrate for stage-then-reconstruct, the sibling of GlobusTransferPort (which serves the outer user-delivery leg). It runs an APS Fast Data Transfer (fdt.jar) client as a subprocess via an injected TransferRunner and maps the exit code into a TransferState. Integrity and sync are deferred on purpose: checksum-on-arrival is recorded as an Attestation in the materialize-a-Distribution edge job that consumes this port, and general sync belongs to richer substrates (Globus). Progress is coarse (a subprocess exposes no per-file counters). Grounded in the real 2-BM pipeline (2bm-docs ops/item_018 + item_025): the per-scan tomdet:/local1 -> /data2 copy via fdt.jar or scp is exactly the leg reconstruction on the analysis nodes waits on. Unit-tested against a fake runner; not run against a real fdt.jar. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../operation/adapters/fdt_transfer_port.py | 174 ++++++++++++++++++ .../unit/operation/test_fdt_transfer_port.py | 171 +++++++++++++++++ 2 files changed, 345 insertions(+) create mode 100644 apps/api/src/cora/operation/adapters/fdt_transfer_port.py create mode 100644 apps/api/tests/unit/operation/test_fdt_transfer_port.py diff --git a/apps/api/src/cora/operation/adapters/fdt_transfer_port.py b/apps/api/src/cora/operation/adapters/fdt_transfer_port.py new file mode 100644 index 00000000000..9dcf8016302 --- /dev/null +++ b/apps/api/src/cora/operation/adapters/fdt_transfer_port.py @@ -0,0 +1,174 @@ +"""FdtTransferPort: TransferPort over a transfer-client subprocess (FDT / scp). + +The inner-leg substrate for CORA's stage-then-reconstruct: it moves a dataset's +bytes from the acquisition host to the analysis tier by running a transfer +client as a subprocess (APS Fast Data Transfer, `fdt.jar`, or scp). It is the +sibling of `GlobusTransferPort`, which serves the outer user-delivery leg; this +one serves the per-scan acquisition-to-analysis copy that gates reconstruction +(the move the 2-BM pipeline does today via `fdt.jar` or scp, tomdet:/local1 -> +/data2). + +The transfer client is invoked through an injected `TransferRunner` so the +adapter is unit-testable without a real subprocess; the production runner +(`SubprocessTransferRunner`) launches and polls a real child process. + +## What it does NOT do + +Integrity is not established here. Per the CORA-lens design, checksum-on-arrival +is recorded as an `Attestation` (via the Data BC `ChecksumVerifier`) in the +materialize-a-Distribution edge job that consumes this port, not inside the +mover. So `verify_on_arrival` and `skip_unchanged` are not honored by this +adapter (FDT/scp offer no general sync); they are deferred to that edge job and +to richer substrates (Globus) respectively. Progress is coarse: a subprocess +exposes no per-file counters, so `observe` reports lifecycle state plus the exit +code on failure, with the counts left at their defaults. + +## Not run against real FDT + +Unit-tested against a fake runner; not exercised against a real `fdt.jar` or a +live host pair. Treat live behaviour as unverified until a sanity run happens. +""" + +from __future__ import annotations + +import asyncio +import contextlib +from dataclasses import dataclass, field +from typing import Protocol + +from cora.operation.ports.transfer_port import ( + TransferEndpointUnreachableError, + TransferHandle, + TransferProgress, + TransferRejectedError, + TransferRequest, + TransferState, +) + + +class TransferRunner(Protocol): + """Launch / poll / terminate a transfer-client subprocess. + + Injected so `FdtTransferPort` is testable with a fake. `start` returns an + opaque token; `poll` returns the process exit code, or None while it is + still running; `terminate` best-effort stops a running process. + """ + + async def start(self, argv: tuple[str, ...]) -> str: ... + async def poll(self, token: str) -> int | None: ... + async def terminate(self, token: str) -> None: ... + + +class SubprocessTransferRunner: + """Production `TransferRunner` backed by asyncio child processes. + + Not unit-tested (it launches real processes); the adapter's behaviour is + covered against a fake runner. `poll` reads the process's `returncode`, + which the event loop sets when the child exits. + """ + + def __init__(self) -> None: + self._processes: dict[str, asyncio.subprocess.Process] = {} + self._counter = 0 + + async def start(self, argv: tuple[str, ...]) -> str: + process = await asyncio.create_subprocess_exec( + *argv, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + self._counter += 1 + token = f"fdt-proc-{self._counter}" + self._processes[token] = process + return token + + async def poll(self, token: str) -> int | None: + return self._processes[token].returncode + + async def terminate(self, token: str) -> None: + process = self._processes[token] + if process.returncode is None: + process.terminate() + + +@dataclass +class _MoveState: + """Per-handle bookkeeping the adapter keeps alongside the runner's token.""" + + cancelled: bool = False + + +@dataclass +class FdtTransferPort: + """`TransferPort` that moves bytes by running a transfer-client subprocess. + + `fdt_jar` and `java` locate the Fast Data Transfer client. A deployment that + moves via scp would supply a sibling adapter at the rule-of-three trigger; + this one builds an `fdt.jar` client invocation. See the module docstring for + what this adapter deliberately does not do. + """ + + runner: TransferRunner + fdt_jar: str = "/APSshare/bin/fdt.jar" + java: str = "java" + _moves: dict[TransferHandle, _MoveState] = field( + default_factory=dict[TransferHandle, _MoveState] + ) + + async def begin(self, request: TransferRequest) -> TransferHandle: + _, source_path = _split_location(request.source) + dest_host, dest_dir = _split_location(request.destination) + argv: tuple[str, ...] = (self.java, "-jar", self.fdt_jar, "-c", dest_host, "-d", dest_dir) + if request.recursive: + argv += ("-r",) + argv += (source_path,) + try: + token = await self.runner.start(argv) + except OSError as exc: + raise TransferEndpointUnreachableError(request.destination) from exc + handle = TransferHandle(token) + self._moves[handle] = _MoveState() + return handle + + async def observe(self, handle: TransferHandle) -> TransferProgress: + move = self._moves[handle] + if move.cancelled: + return TransferProgress(state=TransferState.CANCELLED) + returncode = await self.runner.poll(str(handle)) + if returncode is None: + return TransferProgress(state=TransferState.ACTIVE) + if returncode == 0: + return TransferProgress(state=TransferState.SUCCEEDED) + return TransferProgress( + state=TransferState.FAILED, + detail=f"transfer client exited with code {returncode}", + ) + + async def cancel(self, handle: TransferHandle) -> None: + move = self._moves[handle] + returncode = await self.runner.poll(str(handle)) + if returncode is not None: + return + await self.runner.terminate(str(handle)) + move.cancelled = True + + async def aclose(self) -> None: + for handle in list(self._moves): + with contextlib.suppress(Exception): + await self.runner.terminate(str(handle)) + + +def _split_location(location: str) -> tuple[str, str]: + """Split a CORA `host:path` location into (host, path). + + Raises `TransferRejectedError` on a malformed location before any subprocess + launch, so an authoring error fails fast rather than spawning a broken move. + """ + host, separator, path = location.partition(":") + if not separator or not host or not path: + msg = f"location {location!r} is not in 'host:path' form" + raise TransferRejectedError(msg) + return host, path + + +__all__ = ["FdtTransferPort", "SubprocessTransferRunner", "TransferRunner"] diff --git a/apps/api/tests/unit/operation/test_fdt_transfer_port.py b/apps/api/tests/unit/operation/test_fdt_transfer_port.py new file mode 100644 index 00000000000..a9f04f49bcc --- /dev/null +++ b/apps/api/tests/unit/operation/test_fdt_transfer_port.py @@ -0,0 +1,171 @@ +"""Unit tests for `FdtTransferPort` against a fake transfer runner. + +The adapter builds an `fdt.jar` client invocation from a `TransferRequest` and +maps the subprocess exit code into a `TransferState`. The runner is faked, so +nothing here launches a real process; live FDT behaviour is unverified. +""" + +import pytest + +from cora.operation.adapters.fdt_transfer_port import FdtTransferPort +from cora.operation.ports.transfer_port import ( + TransferEndpointUnreachableError, + TransferPort, + TransferRejectedError, + TransferRequest, + TransferState, +) + +_SOURCE = "tomdet:/local1/2BM/2026-06/scan_001.h5" +_DESTINATION = "tomo1:/data2/2BM/2026-06" +_STAGE_IN = TransferRequest(source=_SOURCE, destination=_DESTINATION) + + +class _FakeRunner: + """Records launched argv; replays a seeded exit-code sequence per move.""" + + def __init__(self) -> None: + self.started_argv: list[tuple[str, ...]] = [] + self.terminated: list[str] = [] + self._start_error: Exception | None = None + self._sequences: list[list[int | None]] = [] + self._poll_state: dict[str, list[int | None]] = {} + self._counter = 0 + + def set_start_error(self, error: Exception) -> None: + self._start_error = error + + def set_next_exit_sequence(self, sequence: list[int | None]) -> None: + """Seed the exit-code sequence the next started move reports (one per poll). + + Each `poll` returns the next element, clamping on the last; None means + still running. With nothing seeded a move polls as exit code 0 (success). + """ + self._sequences.append(list(sequence)) + + async def start(self, argv: tuple[str, ...]) -> str: + if self._start_error is not None: + raise self._start_error + self.started_argv.append(argv) + self._counter += 1 + token = f"fake-{self._counter}" + self._poll_state[token] = self._sequences.pop(0) if self._sequences else [0] + return token + + async def poll(self, token: str) -> int | None: + sequence = self._poll_state[token] + code = sequence[0] + if len(sequence) > 1: + sequence.pop(0) + return code + + async def terminate(self, token: str) -> None: + self.terminated.append(token) + + +@pytest.mark.unit +async def test_begin_builds_an_fdt_client_invocation_for_a_single_file() -> None: + runner = _FakeRunner() + port = FdtTransferPort(runner) + await port.begin(_STAGE_IN) + assert runner.started_argv[0] == ( + "java", + "-jar", + "/APSshare/bin/fdt.jar", + "-c", + "tomo1", + "-d", + "/data2/2BM/2026-06", + "/local1/2BM/2026-06/scan_001.h5", + ) + + +@pytest.mark.unit +async def test_begin_adds_the_recursive_flag_for_a_directory_move() -> None: + runner = _FakeRunner() + port = FdtTransferPort(runner) + await port.begin( + TransferRequest(source="tomdet:/local1/exp", destination="tomo1:/data2/exp", recursive=True) + ) + assert "-r" in runner.started_argv[0] + + +@pytest.mark.unit +async def test_observe_maps_running_then_zero_exit_to_active_then_succeeded() -> None: + runner = _FakeRunner() + runner.set_next_exit_sequence([None, 0]) + port = FdtTransferPort(runner) + handle = await port.begin(_STAGE_IN) + assert (await port.observe(handle)).state is TransferState.ACTIVE + assert (await port.observe(handle)).state is TransferState.SUCCEEDED + + +@pytest.mark.unit +async def test_observe_maps_a_nonzero_exit_to_failed_with_the_code_in_detail() -> None: + runner = _FakeRunner() + runner.set_next_exit_sequence([3]) + port = FdtTransferPort(runner) + handle = await port.begin(_STAGE_IN) + progress = await port.observe(handle) + assert progress.state is TransferState.FAILED + assert "3" in (progress.detail or "") + + +@pytest.mark.unit +async def test_cancel_of_a_running_move_makes_it_observe_as_cancelled() -> None: + runner = _FakeRunner() + runner.set_next_exit_sequence([None]) + port = FdtTransferPort(runner) + handle = await port.begin(_STAGE_IN) + assert (await port.observe(handle)).state is TransferState.ACTIVE + + await port.cancel(handle) + assert (await port.observe(handle)).state is TransferState.CANCELLED + assert runner.terminated == [str(handle)] + + +@pytest.mark.unit +async def test_cancel_of_an_already_finished_move_is_a_noop() -> None: + runner = _FakeRunner() + runner.set_next_exit_sequence([0]) + port = FdtTransferPort(runner) + handle = await port.begin(_STAGE_IN) + assert (await port.observe(handle)).state is TransferState.SUCCEEDED + + await port.cancel(handle) + assert (await port.observe(handle)).state is TransferState.SUCCEEDED + assert runner.terminated == [] + + +@pytest.mark.unit +async def test_malformed_location_is_rejected_before_launching_anything() -> None: + runner = _FakeRunner() + port = FdtTransferPort(runner) + with pytest.raises(TransferRejectedError): + await port.begin(TransferRequest(source="no-colon", destination=_DESTINATION)) + assert runner.started_argv == [] + + +@pytest.mark.unit +async def test_a_failure_to_launch_maps_to_endpoint_unreachable() -> None: + runner = _FakeRunner() + runner.set_start_error(FileNotFoundError("java not found")) + port = FdtTransferPort(runner) + with pytest.raises(TransferEndpointUnreachableError): + await port.begin(_STAGE_IN) + + +@pytest.mark.unit +async def test_adapter_satisfies_the_transfer_port_protocol() -> None: + port = FdtTransferPort(_FakeRunner()) + assert isinstance(port, TransferPort) + + +@pytest.mark.unit +async def test_aclose_terminates_outstanding_moves() -> None: + runner = _FakeRunner() + runner.set_next_exit_sequence([None]) + port = FdtTransferPort(runner) + handle = await port.begin(_STAGE_IN) + await port.aclose() + assert str(handle) in runner.terminated From 683f70bd66f6663ade3cacf282ec07ea06afc71b Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Fri, 26 Jun 2026 00:19:42 +0300 Subject: [PATCH 4/7] feat(api): DistributionMaterializer edge-job, leg B of stage-then-reconstruct (triage spike) Sequences the materialize-a-Distribution flow: move bytes over a TransferPort, then on success register a new analysis-tier Distribution of the same raw Dataset (register_distribution) and record a checksum Attestation (record_attestation), whose Match flips the Distribution to Verified in the Data BC projection. That Verified-at-tier fact is exactly what leg C's start_run gate reads. It owns the sequence and the transfer gate (registers only on a Succeeded move, waits through a non-terminal Suspended); it trusts the caller's RegisterDistribution (byte-identical-copy fields built from the parent Dataset). Lives in cora.api, the only module that may reach both operation.ports and the Data BC handlers; injected collaborators, unit-tested against the in-memory TransferPort plus fake handlers. Not yet wired into the EdgeConductor or the app (deferred to the real build, post gate-review of leg C). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cora/api/_distribution_materializer.py | 176 +++++++++++++++++ .../api/test_distribution_materializer.py | 186 ++++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 apps/api/src/cora/api/_distribution_materializer.py create mode 100644 apps/api/tests/unit/api/test_distribution_materializer.py diff --git a/apps/api/src/cora/api/_distribution_materializer.py b/apps/api/src/cora/api/_distribution_materializer.py new file mode 100644 index 00000000000..8047afd445a --- /dev/null +++ b/apps/api/src/cora/api/_distribution_materializer.py @@ -0,0 +1,176 @@ +"""DistributionMaterializer: leg B of stage-then-reconstruct. + +Sequences the three acts that turn "bytes copied to another tier" into a fact +CORA trusts: (1) move the bytes over a `TransferPort`, (2) on success record a +new `Distribution` of the SAME raw Dataset at the analysis-tier Storage Supply +(`register_distribution`), (3) record a checksum `Attestation` over the landed +bytes (`record_attestation`), whose Match flips the Distribution to Verified in +the Data BC projection. That Verified-at-tier fact is exactly what leg C's +start_run gate reads ([[project_run_input_dependency_design]]). + +This is an orchestration concern, so it lives in `cora.api` (the only module +that may reach both `cora.operation.ports` and the Data BC handlers). It is the +materialize edge job the EdgeConductor will drive; here it is a self-contained +unit with injected collaborators, exercised against fakes. + +## What it owns vs trusts + +It OWNS the sequence and the transfer gate: it begins the transfer, observes to +a terminal (waiting through a non-terminal `Suspended`), and registers ONLY if +the transfer Succeeded. It TRUSTS the caller's `RegisterDistribution`: the +caller (which holds the parent Dataset) builds it with the byte-identical-copy +fields (checksum / byte_size / media_type equal to the parent Dataset); the +Data BC decider enforces that equality. The `RecordAttestation` is built here +from the registration's `dataset_id` plus the new Distribution id. + +## Eventual-consistency note + +`record_attestation` returns an attestation id for Match, Mismatch, AND +Unreachable; the Distribution's flip to Verified (Match) or Stale (Mismatch) is +a projection-only update the Data BC applies from `AttestationRecorded`. So a +successful `materialize` means "moved + registered + attested", not "Verified"; +the Verified status is the projection's eventual flip, which leg C's gate reads +later. This mirrors the safety-envelope eventual-consistency window. + +## Deferred + +A long-running continuous sync would complete via a later signal rather than a +synchronous observe-loop; this unit polls to a terminal with a runaway bound. +Real-adapter poll backoff and the signal-driven variant are deferred. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from cora.data.features.record_attestation.command import RecordAttestation +from cora.infrastructure.routing import NIL_SENTINEL_ID +from cora.operation.ports.transfer_port import TransferState + +if TYPE_CHECKING: + from uuid import UUID + + from cora.data.features.record_attestation.handler import Handler as RecordAttestationHandler + from cora.data.features.register_distribution.command import RegisterDistribution + from cora.data.features.register_distribution.handler import ( + Handler as RegisterDistributionHandler, + ) + from cora.operation.ports.transfer_port import ( + TransferHandle, + TransferPort, + TransferProgress, + TransferRequest, + ) + +_MAX_OBSERVATIONS = 10_000 +"""Runaway bound on the observe-loop. A real transfer reaches a terminal in far +fewer polls; this is a backstop against a substrate that never terminates, not a +tuning knob. Exceeding it raises rather than spinning forever.""" + +_CHECKSUM_VERIFIED_KIND = "ChecksumVerified" +"""The AttestationKind wire value the materialize edge job requests. Mirrored as +a literal because `cora.api` may not import `cora.data.aggregates` (tach); the +record_attestation handler re-validates it against the closed AttestationKind.""" + + +@dataclass(frozen=True) +class MaterializationOutcome: + """The result of one materialize: the transfer terminal plus what it recorded. + + `distribution_id` and `attestation_id` are None when the transfer did not + Succeed (nothing is registered off an incomplete move). `materialized` is the + one-call success predicate. `transfer_state` and `transfer_detail` carry the + terminal observation so a caller can record why a non-success move stopped. + """ + + transfer_state: TransferState + distribution_id: UUID | None = None + attestation_id: UUID | None = None + transfer_detail: str | None = None + + @property + def materialized(self) -> bool: + """True iff the move Succeeded and a Distribution was registered.""" + return self.distribution_id is not None + + +@dataclass +class DistributionMaterializer: + """Drives transfer -> register_distribution -> record_attestation. + + Construct with a `TransferPort` and the two Data BC handlers (the bare + `Handler` protocols `register_distribution.bind` / `record_attestation.bind` + return); call `materialize` per move. See the module docstring for what it + owns vs trusts. + """ + + transfer_port: TransferPort + register_distribution: RegisterDistributionHandler + record_attestation: RecordAttestationHandler + + async def materialize( + self, + transfer: TransferRequest, + registration: RegisterDistribution, + *, + principal_id: UUID, + correlation_id: UUID, + causation_id: UUID | None = None, + surface_id: UUID = NIL_SENTINEL_ID, + ) -> MaterializationOutcome: + """Move the bytes, then (only on success) register + attest the copy. + + Returns a `MaterializationOutcome`. A non-Succeeded transfer terminal + short-circuits with no Distribution registered. Transfer-port errors and + Data BC domain errors propagate to the caller unchanged. + """ + handle = await self.transfer_port.begin(transfer) + progress = await self._observe_to_terminal(handle) + if progress.state is not TransferState.SUCCEEDED: + return MaterializationOutcome( + transfer_state=progress.state, transfer_detail=progress.detail + ) + + distribution_id = await self.register_distribution( + registration, + principal_id=principal_id, + correlation_id=correlation_id, + causation_id=causation_id, + surface_id=surface_id, + ) + attestation_id = await self.record_attestation( + RecordAttestation( + dataset_id=registration.dataset_id, + distribution_id=distribution_id, + kind=_CHECKSUM_VERIFIED_KIND, + ), + principal_id=principal_id, + correlation_id=correlation_id, + causation_id=causation_id, + surface_id=surface_id, + ) + return MaterializationOutcome( + transfer_state=TransferState.SUCCEEDED, + distribution_id=distribution_id, + attestation_id=attestation_id, + transfer_detail=progress.detail, + ) + + async def _observe_to_terminal(self, handle: TransferHandle) -> TransferProgress: + """Poll `observe` until a terminal, waiting through a non-terminal Suspended. + + Bounded by `_MAX_OBSERVATIONS` as a runaway backstop; a real long-running + transfer would terminalize via a later signal instead (deferred). + """ + for _ in range(_MAX_OBSERVATIONS): + progress = await self.transfer_port.observe(handle) + if progress.state.is_terminal: + return progress + msg = ( + f"transfer {handle!r} did not reach a terminal within {_MAX_OBSERVATIONS} observations" + ) + raise RuntimeError(msg) + + +__all__ = ["DistributionMaterializer", "MaterializationOutcome"] diff --git a/apps/api/tests/unit/api/test_distribution_materializer.py b/apps/api/tests/unit/api/test_distribution_materializer.py new file mode 100644 index 00000000000..de0a0afa009 --- /dev/null +++ b/apps/api/tests/unit/api/test_distribution_materializer.py @@ -0,0 +1,186 @@ +"""Unit tests for `DistributionMaterializer` (leg B of stage-then-reconstruct). + +Exercises the sequence transfer -> register_distribution -> record_attestation +against the in-memory TransferPort double and fake Data BC handlers: nothing +here stands up a Kernel or touches an event store. +""" + +from uuid import UUID, uuid4 + +import pytest + +from cora.api._distribution_materializer import DistributionMaterializer +from cora.data.features.record_attestation.command import RecordAttestation +from cora.data.features.register_distribution.command import RegisterDistribution +from cora.infrastructure.routing import NIL_SENTINEL_ID +from cora.operation.adapters.in_memory_transfer_port import InMemoryTransferPort +from cora.operation.ports.transfer_port import TransferProgress, TransferRequest, TransferState + +_DATASET_ID = uuid4() +_SUPPLY_ID = uuid4() +_DISTRIBUTION_ID = uuid4() +_ATTESTATION_ID = uuid4() +_PRINCIPAL_ID = uuid4() +_CORRELATION_ID = uuid4() + +_TRANSFER = TransferRequest( + source="tomdet:/local1/2BM/scan_001.h5", destination="tomo1:/data2/2BM/scan_001.h5" +) +_REGISTRATION = RegisterDistribution( + dataset_id=_DATASET_ID, + supply_id=_SUPPLY_ID, + uri="file:///data2/2BM/scan_001.h5", + checksum_algorithm="sha256", + checksum_value="a" * 64, + byte_size=4096, + media_type="application/x-hdf5", + access_protocol="POSIX", +) + + +class _FakeRegisterHandler: + """Records each RegisterDistribution call; returns a fixed distribution id.""" + + def __init__(self, distribution_id: UUID) -> None: + self._distribution_id = distribution_id + self.commands: list[RegisterDistribution] = [] + self.principals: list[UUID] = [] + self.correlations: list[UUID] = [] + + async def __call__( + self, + command: RegisterDistribution, + *, + principal_id: UUID, + correlation_id: UUID, + causation_id: UUID | None = None, + surface_id: UUID = NIL_SENTINEL_ID, + ) -> UUID: + self.commands.append(command) + self.principals.append(principal_id) + self.correlations.append(correlation_id) + return self._distribution_id + + +class _FakeAttestHandler: + """Records each RecordAttestation call; returns a fixed attestation id.""" + + def __init__(self, attestation_id: UUID) -> None: + self._attestation_id = attestation_id + self.commands: list[RecordAttestation] = [] + + async def __call__( + self, + command: RecordAttestation, + *, + principal_id: UUID, + correlation_id: UUID, + causation_id: UUID | None = None, + surface_id: UUID = NIL_SENTINEL_ID, + ) -> UUID: + self.commands.append(command) + return self._attestation_id + + +def _materializer( + port: InMemoryTransferPort, register: _FakeRegisterHandler, attest: _FakeAttestHandler +) -> DistributionMaterializer: + return DistributionMaterializer( + transfer_port=port, register_distribution=register, record_attestation=attest + ) + + +async def _materialize( + port: InMemoryTransferPort, register: _FakeRegisterHandler, attest: _FakeAttestHandler +): + return await _materializer(port, register, attest).materialize( + _TRANSFER, + _REGISTRATION, + principal_id=_PRINCIPAL_ID, + correlation_id=_CORRELATION_ID, + ) + + +@pytest.mark.unit +async def test_successful_move_registers_then_attests_and_reports_materialized() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.SUCCEEDED) + register = _FakeRegisterHandler(_DISTRIBUTION_ID) + attest = _FakeAttestHandler(_ATTESTATION_ID) + + outcome = await _materialize(port, register, attest) + + assert outcome.materialized is True + assert outcome.transfer_state is TransferState.SUCCEEDED + assert outcome.distribution_id == _DISTRIBUTION_ID + assert outcome.attestation_id == _ATTESTATION_ID + assert register.commands == [_REGISTRATION] + + +@pytest.mark.unit +async def test_attestation_is_built_from_the_dataset_and_new_distribution_id() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.SUCCEEDED) + register = _FakeRegisterHandler(_DISTRIBUTION_ID) + attest = _FakeAttestHandler(_ATTESTATION_ID) + + await _materialize(port, register, attest) + + assert attest.commands == [ + RecordAttestation( + dataset_id=_DATASET_ID, + distribution_id=_DISTRIBUTION_ID, + kind="ChecksumVerified", + ) + ] + + +@pytest.mark.unit +async def test_failed_move_skips_register_and_attest() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.FAILED, files_failed=3, detail="3 files unreadable") + register = _FakeRegisterHandler(_DISTRIBUTION_ID) + attest = _FakeAttestHandler(_ATTESTATION_ID) + + outcome = await _materialize(port, register, attest) + + assert outcome.materialized is False + assert outcome.transfer_state is TransferState.FAILED + assert outcome.distribution_id is None + assert outcome.attestation_id is None + assert outcome.transfer_detail == "3 files unreadable" + assert register.commands == [] + assert attest.commands == [] + + +@pytest.mark.unit +async def test_move_waits_through_a_non_terminal_suspended_then_materializes() -> None: + port = InMemoryTransferPort() + port.set_next_progression( + ( + TransferProgress(state=TransferState.ACTIVE), + TransferProgress(state=TransferState.SUSPENDED, detail="credential expired"), + TransferProgress(state=TransferState.ACTIVE), + TransferProgress(state=TransferState.SUCCEEDED), + ) + ) + register = _FakeRegisterHandler(_DISTRIBUTION_ID) + attest = _FakeAttestHandler(_ATTESTATION_ID) + + outcome = await _materialize(port, register, attest) + + assert outcome.materialized is True + assert len(register.commands) == 1 + + +@pytest.mark.unit +async def test_threads_principal_and_correlation_into_the_handlers() -> None: + port = InMemoryTransferPort() + port.set_next_terminal(TransferState.SUCCEEDED) + register = _FakeRegisterHandler(_DISTRIBUTION_ID) + attest = _FakeAttestHandler(_ATTESTATION_ID) + + await _materialize(port, register, attest) + + assert register.principals == [_PRINCIPAL_ID] + assert register.correlations == [_CORRELATION_ID] From f2309e90910ee8975348f25fc8d75e57a2fbe3a2 Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Fri, 26 Jun 2026 00:48:30 +0300 Subject: [PATCH 5/7] feat(infrastructure): DatasetDistributionLookup port, leg C sub-slice 1 (triage spike) The cross-BC seam the start_run input gate (leg C of stage-then-reconstruct) will read: given an input Dataset, return its non-Discarded Distributions with status, so the decider can gate on Verified AND distinguish Stale from absent. Mirrors SupplyLookup (one Data-BC adapter, multiple consumers; the port returns rows, the decider partitions on status); lives in cora.infrastructure.ports because Run may not import the Data-internal DistributionLookup (the Edition canonical-pick query, a different need). Ships the Protocol + result DTO + two test stubs; no consumer yet. The Method input-role declaration, the Run input-Dataset binding, and the start_run genesis Verified-gate (plus the Data BC Postgres adapter) are the following sub-slices; the gate sub-slice is where gate-review-before-merge bites. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ports/dataset_distribution_lookup.py | 93 +++++++++++++++++++ .../test_dataset_distribution_lookup.py | 34 +++++++ 2 files changed, 127 insertions(+) create mode 100644 apps/api/src/cora/infrastructure/ports/dataset_distribution_lookup.py create mode 100644 apps/api/tests/unit/infrastructure/test_dataset_distribution_lookup.py diff --git a/apps/api/src/cora/infrastructure/ports/dataset_distribution_lookup.py b/apps/api/src/cora/infrastructure/ports/dataset_distribution_lookup.py new file mode 100644 index 00000000000..7fb5f4ddbe1 --- /dev/null +++ b/apps/api/src/cora/infrastructure/ports/dataset_distribution_lookup.py @@ -0,0 +1,93 @@ +"""DatasetDistributionLookup port: cross-BC query for a Dataset's Distributions. + +Used by the Run BC start_run gate (leg C of stage-then-reconstruct) to check +that a reconstruction's input Dataset has a Verified Distribution before the Run +may start ([[project_run_input_dependency_design]]). Cross-BC mirror of +`SupplyLookup` / `ClearanceLookup`: one implementor (Data BC ships the Postgres +adapter reading `proj_data_distribution_summary`), multiple consumers (the Run +start gate first). It lives in `cora.infrastructure.ports` because Run may not +import the Data-internal `cora.data.ports.DistributionLookup` (that one is the +Edition-shaped lowest-id canonical pick, a different need). + +## Decider-gates, not port-gates + +Returns EVERY non-Discarded Distribution for the Dataset regardless of status, +so the start_run decider can both gate on Verified AND produce a useful +diagnostic ("the input has a Distribution but it is Stale" vs "no Distribution +at all"). This is the `SupplyLookup` posture: the port returns rows, the decider +partitions on `status`. It deliberately does NOT reuse the canonical-pick query, +whose lowest-id row may be Stale while a higher-id Distribution is Verified. + +`status` is the `DistributionStatus` value as a plain string (matches the +projection's TEXT column); `supply_id` is carried for the deferred reachability +check (which Storage Supply / tier the copy rests on); `distribution_id` is +carried for diagnostics and the eventual lineage record. +""" + +from dataclasses import dataclass +from typing import Protocol +from uuid import UUID + + +@dataclass(frozen=True) +class DatasetDistributionLookupResult: + """A non-Discarded Distribution of a Dataset, for the Run-start input gate.""" + + distribution_id: UUID + dataset_id: UUID + supply_id: UUID + status: str + + +class DatasetDistributionLookup(Protocol): + """Cross-BC port: query a Dataset's non-Discarded Distributions from the Run BC.""" + + async def find_by_dataset( + self, dataset_id: UUID + ) -> tuple[DatasetDistributionLookupResult, ...]: + """Return every non-Discarded Distribution for `dataset_id` (any status). + + Empty tuple when the Dataset has no non-Discarded Distribution. The + decider gates on `status == "Verified"`; the port does not filter on + status so the decider can distinguish Stale from absent. + """ + ... + + +class NoDatasetDistributionsLookup: + """Test stub: every Dataset has no Distribution (the not-present gate path). + + The conservative default for tests that do not seed the input gate: the + start_run decider sees an input with no Verified Distribution and raises. + """ + + async def find_by_dataset( + self, dataset_id: UUID + ) -> tuple[DatasetDistributionLookupResult, ...]: + _ = dataset_id + return () + + +class SeededDatasetDistributionLookup: + """Test stub: returns the Distributions configured per Dataset id. + + Construct with a mapping `{dataset_id: (result, ...)}`; an unmapped Dataset + returns an empty tuple (absent). Lets a gate test seed a Verified row, a + Stale-only row, or no row to exercise each decider branch. + """ + + def __init__(self, by_dataset: dict[UUID, tuple[DatasetDistributionLookupResult, ...]]) -> None: + self._by_dataset = dict(by_dataset) + + async def find_by_dataset( + self, dataset_id: UUID + ) -> tuple[DatasetDistributionLookupResult, ...]: + return self._by_dataset.get(dataset_id, ()) + + +__all__ = [ + "DatasetDistributionLookup", + "DatasetDistributionLookupResult", + "NoDatasetDistributionsLookup", + "SeededDatasetDistributionLookup", +] diff --git a/apps/api/tests/unit/infrastructure/test_dataset_distribution_lookup.py b/apps/api/tests/unit/infrastructure/test_dataset_distribution_lookup.py new file mode 100644 index 00000000000..787b35b607c --- /dev/null +++ b/apps/api/tests/unit/infrastructure/test_dataset_distribution_lookup.py @@ -0,0 +1,34 @@ +"""Behavioural tests for the `DatasetDistributionLookup` test stubs (leg C, C1).""" + +from uuid import uuid4 + +import pytest + +from cora.infrastructure.ports.dataset_distribution_lookup import ( + DatasetDistributionLookupResult, + NoDatasetDistributionsLookup, + SeededDatasetDistributionLookup, +) + +_DATASET_ID = uuid4() + + +@pytest.mark.unit +async def test_no_distributions_stub_returns_empty_for_any_dataset() -> None: + lookup = NoDatasetDistributionsLookup() + assert await lookup.find_by_dataset(_DATASET_ID) == () + + +@pytest.mark.unit +async def test_seeded_stub_returns_the_configured_rows_for_a_dataset() -> None: + verified = DatasetDistributionLookupResult( + distribution_id=uuid4(), dataset_id=_DATASET_ID, supply_id=uuid4(), status="Verified" + ) + lookup = SeededDatasetDistributionLookup({_DATASET_ID: (verified,)}) + assert await lookup.find_by_dataset(_DATASET_ID) == (verified,) + + +@pytest.mark.unit +async def test_seeded_stub_returns_empty_for_an_unmapped_dataset() -> None: + lookup = SeededDatasetDistributionLookup({}) + assert await lookup.find_by_dataset(uuid4()) == () From e1bcc0002d8b6dbbec63506b9116888e5998bb5b Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Fri, 26 Jun 2026 01:42:54 +0300 Subject: [PATCH 6/7] feat(recipe): Method declares needed_input_kinds, leg C sub-slice 2 (triage spike) Adds an optional additive frozenset[str] field to the Method aggregate: the kinds/roles of input Datasets a reconstruction consumes (raw-projections, flat-field, prior-reconstruction). The recipe declares the input need, the way it already declares needed_supplies / needed_family_ids; a per-Run binding will resolve the concrete input Dataset (sub-slice C3), and the start_run gate reads its Verified Distribution (C4). Mirrors needed_supplies across state / event / command / decider / evolver (all seven evolver arms carry it), with one deliberate difference: it participates in the Method content hash CONDITIONALLY (rendered only when non-empty, like role_kind) so existing Methods' content_hash bytes stay byte-stable. Optional-by-default, so a Method declaring no input kinds is byte-identical to before and no existing define_method caller, scenario, or content-hash golden changes. New error InvalidMethodNeededInputKindsError registered as a 400. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cora/recipe/aggregates/method/__init__.py | 4 + .../cora/recipe/aggregates/method/events.py | 19 + .../cora/recipe/aggregates/method/evolver.py | 41 +- .../cora/recipe/aggregates/method/state.py | 59 ++- .../recipe/features/define_method/command.py | 9 + .../recipe/features/define_method/decider.py | 17 + apps/api/src/cora/recipe/routes.py | 4 +- .../unit/recipe/test_define_method_handler.py | 2 + .../tests/unit/recipe/test_method_events.py | 3 + .../tests/unit/recipe/test_method_evolver.py | 106 ++++ .../recipe/test_method_needed_input_kinds.py | 451 ++++++++++++++++++ 11 files changed, 710 insertions(+), 5 deletions(-) create mode 100644 apps/api/tests/unit/recipe/test_method_needed_input_kinds.py diff --git a/apps/api/src/cora/recipe/aggregates/method/__init__.py b/apps/api/src/cora/recipe/aggregates/method/__init__.py index ef944c1b716..1dc50af5ab5 100644 --- a/apps/api/src/cora/recipe/aggregates/method/__init__.py +++ b/apps/api/src/cora/recipe/aggregates/method/__init__.py @@ -46,6 +46,7 @@ from cora.recipe.aggregates.method.state import ( ITERATIVE_STOPPING_KEYS, METHOD_NAME_MAX_LENGTH, + METHOD_NEEDED_INPUT_KIND_MAX_LENGTH, METHOD_NEEDED_SUPPLY_KIND_MAX_LENGTH, METHOD_VERSION_TAG_MAX_LENGTH, ROLE_NAME_MAX_LENGTH, @@ -54,6 +55,7 @@ InvalidMethodIterativeStoppingFieldError, InvalidMethodMonotoneQualityError, InvalidMethodNameError, + InvalidMethodNeededInputKindsError, InvalidMethodNeededSuppliesError, InvalidMethodVersionTagError, InvalidPortRequirementError, @@ -83,6 +85,7 @@ __all__ = [ "ITERATIVE_STOPPING_KEYS", "METHOD_NAME_MAX_LENGTH", + "METHOD_NEEDED_INPUT_KIND_MAX_LENGTH", "METHOD_NEEDED_SUPPLY_KIND_MAX_LENGTH", "METHOD_VERSION_TAG_MAX_LENGTH", "ROLE_NAME_MAX_LENGTH", @@ -94,6 +97,7 @@ "InvalidMethodIterativeStoppingFieldError", "InvalidMethodMonotoneQualityError", "InvalidMethodNameError", + "InvalidMethodNeededInputKindsError", "InvalidMethodNeededSuppliesError", "InvalidMethodParametersSchemaError", "InvalidMethodVersionTagError", diff --git a/apps/api/src/cora/recipe/aggregates/method/events.py b/apps/api/src/cora/recipe/aggregates/method/events.py index b42fc880243..6a3ec662476 100644 --- a/apps/api/src/cora/recipe/aggregates/method/events.py +++ b/apps/api/src/cora/recipe/aggregates/method/events.py @@ -64,6 +64,15 @@ class MethodDefined: field fold via `payload.get("needed_assembly_ids", ())`. Values are sorted by string form in `to_payload` for persistence determinism (matches needed_family_ids). Default empty tuple. + + `needed_input_kinds` (additive evolution) carries the kinds/roles + of input Datasets the Method consumes (for example + "raw-projections", "flat-field", "prior-reconstruction"), NOT + Dataset instance ids. Eventual-consistency: resolved to concrete + input Datasets at Plan-bind / start_run. Older events without the + field fold via `payload.get("needed_input_kinds", ())`. Values are + sorted lexically in `to_payload` for persistence determinism + (matches needed_supplies). Default empty tuple. """ method_id: UUID @@ -71,6 +80,7 @@ class MethodDefined: needed_family_ids: tuple[UUID, ...] occurred_at: datetime needed_supplies: tuple[str, ...] = () + needed_input_kinds: tuple[str, ...] = () # additive evolution: capability_id points to the # universal Capability template this Method realizes. Defaults # None for older events without the field (additive-state pattern); current decider @@ -274,6 +284,7 @@ def to_payload(event: MethodEvent) -> dict[str, Any]: name=name, needed_family_ids=needed_family_ids, needed_supplies=needed_supplies, + needed_input_kinds=needed_input_kinds, capability_id=capability_id, needed_assembly_ids=needed_assembly_ids, execution_pattern=execution_pattern, @@ -289,6 +300,10 @@ def to_payload(event: MethodEvent) -> dict[str, Any]: # deterministic payload bytes (matches needed_family_ids # convention; same idempotency-hash story). "needed_supplies": sorted(needed_supplies), + # additive: input-kind strings sorted lexically for + # deterministic payload bytes (matches needed_supplies + # convention). Always rendered in the event payload. + "needed_input_kinds": sorted(needed_input_kinds), # additive: capability_id is None on older events # without the field; the from_stored fallback to None # preserves legacy stream replay. @@ -416,6 +431,10 @@ def _build_method_defined() -> MethodDefined: # payloads have no needed_supplies key; default to empty # tuple. Additive-evolution pattern. needed_supplies=tuple(payload.get("needed_supplies", ())), + # forward-compat: older MethodDefined payloads have no + # needed_input_kinds key; default to empty tuple. + # Additive-evolution pattern. + needed_input_kinds=tuple(payload.get("needed_input_kinds", ())), # forward-compat: older MethodDefined payloads # have no capability_id key; default to None. Currently # the decider enforces non-None at write time. diff --git a/apps/api/src/cora/recipe/aggregates/method/evolver.py b/apps/api/src/cora/recipe/aggregates/method/evolver.py index 902f37d42e5..f0dc45ae04e 100644 --- a/apps/api/src/cora/recipe/aggregates/method/evolver.py +++ b/apps/api/src/cora/recipe/aggregates/method/evolver.py @@ -49,9 +49,10 @@ **Critical invariant**: every transition arm MUST carry `needed_family_ids`, `version`, `parameters_schema`, -`needed_supplies`, `capability_id`, `needed_assembly_ids`, AND the -compute-classification fields (`execution_pattern`, -`monotone_quality`, `resumable_from_checkpoint`), AND `launch_spec` +`needed_supplies`, `needed_input_kinds`, `capability_id`, +`needed_assembly_ids`, AND the compute-classification fields +(`execution_pattern`, `monotone_quality`, +`resumable_from_checkpoint`), AND `launch_spec` through from prior state. Constructing `Method(id=..., name=..., status=...)` without explicitly passing the additive frozenset/optional fields would silently WIPE them to @@ -108,6 +109,7 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: name=name, needed_family_ids=needed_family_ids, needed_supplies=needed_supplies, + needed_input_kinds=needed_input_kinds, capability_id=capability_id, needed_assembly_ids=needed_assembly_ids, execution_pattern=execution_pattern, @@ -122,6 +124,9 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: status=MethodStatus.DEFINED, # version defaults to None. needed_supplies=frozenset(needed_supplies), + # needed_input_kinds flows through genesis. Empty for + # legacy streams without the field (additive-state default). + needed_input_kinds=frozenset(needed_input_kinds), # capability_id flows through genesis. None for # legacy streams without the field (additive-state default). capability_id=capability_id, @@ -151,6 +156,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: content_hash=content_hash, parameters_schema=prior.parameters_schema, needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, # capability_id PRESERVED across versioning (Method # operates as the same Capability executor across # revisions; rebinding would mean a new Method). @@ -188,6 +198,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: content_hash=prior.content_hash, parameters_schema=prior.parameters_schema, needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, # capability_id PRESERVED across deprecation; audit- # critical (the historical Capability binding stays # visible). @@ -229,6 +244,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: dict(parameters_schema) if parameters_schema is not None else None ), needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, # capability_id PRESERVED across schema updates; # parameters_schema and capability binding evolve # independently. @@ -263,6 +283,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: content_hash=prior.content_hash, parameters_schema=prior.parameters_schema, needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, capability_id=prior.capability_id, needed_assembly_ids=prior.needed_assembly_ids, execution_pattern=prior.execution_pattern, @@ -308,6 +333,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: content_hash=prior.content_hash, parameters_schema=prior.parameters_schema, needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, capability_id=prior.capability_id, needed_assembly_ids=prior.needed_assembly_ids, # compute classification PRESERVED across every transition @@ -339,6 +369,11 @@ def evolve(state: Method | None, event: MethodEvent) -> Method: content_hash=prior.content_hash, parameters_schema=prior.parameters_schema, needed_supplies=prior.needed_supplies, + # needed_input_kinds PRESERVED across every transition + # (part of content identity; omitting it would silently + # wipe the field to empty, the critical invariant the + # evolver docstring warns about). + needed_input_kinds=prior.needed_input_kinds, capability_id=prior.capability_id, needed_assembly_ids=prior.needed_assembly_ids, # compute classification PRESERVED across every transition diff --git a/apps/api/src/cora/recipe/aggregates/method/state.py b/apps/api/src/cora/recipe/aggregates/method/state.py index f7f2b51819a..9b1b5d8ac3d 100644 --- a/apps/api/src/cora/recipe/aggregates/method/state.py +++ b/apps/api/src/cora/recipe/aggregates/method/state.py @@ -109,6 +109,15 @@ # [[project_supply_design]] §"Method.needed_supplies consumer" # for the design lock. METHOD_NEEDED_SUPPLY_KIND_MAX_LENGTH = 50 +# needed_input_kinds element bounds. The kinds/roles of input Datasets +# a Method consumes (for example "raw-projections", "flat-field", +# "prior-reconstruction"); free-string labels resolved to concrete +# input Datasets at Plan-bind / start_run. 50-char ceiling matches the +# needed_supplies kind bound so per-element validation in the Method +# decider stays consistent across the two consumes-axis fields. Input +# kinds are NOT SupplyKind (no DeferredVocabulary marker; see the +# Method state field docstring). +METHOD_NEEDED_INPUT_KIND_MAX_LENGTH = 50 # RoleName bound. Method-local labels for positional role-tagging # (IEC 81346 Function aspect; see [[project-method-required-roles-design]] # and [[project-equipment-isa-gap-research]]). Free-string within the @@ -263,6 +272,31 @@ def __init__(self, value: str) -> None: self.value = value +class InvalidMethodNeededInputKindsError(ValueError): + """One of the supplied needed_input_kinds strings is empty, + whitespace-only, or too long. + + Validated at the API boundary via Pydantic per-element + `min_length=1, max_length=50`, AND defensively at the decider via + this error so direct in-process callers (sagas, tests) get the + same protection. The diagnostic carries the offending element. + + Mirrors `InvalidMethodNeededSuppliesError` shape. needed_input_kinds + references the kinds/roles of input Datasets a Method consumes + (eventual-consistency: resolved to concrete input Datasets at + Plan-bind / start_run, never verified at decide time). Unlike + needed_supplies the element type carries no DeferredVocabulary + marker: input kinds are free-string labels, not SupplyKind values. + """ + + def __init__(self, value: str) -> None: + super().__init__( + f"Method needed_input_kinds kind must be 1-{METHOD_NEEDED_INPUT_KIND_MAX_LENGTH} " + f"chars after trimming (got: {value!r})" + ) + self.value = value + + class InvalidMethodVersionTagError(ValueError): """The supplied version tag is empty, whitespace-only, or too long. @@ -650,6 +684,16 @@ class Method: trigger_doc="Supply.kind Watch item 4 trigger per project-structural-scope-design", ), ] = field(default_factory=frozenset[str]) + # needed_input_kinds names the kinds/roles of input Datasets this + # Method consumes (for example "raw-projections", "flat-field", + # "prior-reconstruction"). Eventual-consistency: NOT verified at + # decide time, resolved to concrete input Datasets at Plan-bind / + # start_run. Defaults to empty frozenset (additive-state pattern; + # legacy MethodDefined-only streams fold cleanly via payload.get + # default). A plain frozenset[str] with no DeferredVocabulary + # marker: input kinds are free-string labels, NOT SupplyKind, so + # they do not graduate in lockstep with Supply.kind. + needed_input_kinds: frozenset[str] = field(default_factory=frozenset[str]) # needed_assembly_ids references Assembly aggregates (Equipment BC) # by UUID. Declares "this Method needs a specific composition # blueprint" (e.g., the Microscope fixture), not just N @@ -724,8 +768,15 @@ def content_subset(self) -> dict[str, object]: it participates in content identity (anti-hook #10) at the same site as the field itself; deciders and drift-detection helpers call this rather than re-listing the subset. + + `needed_input_kinds` is rendered CONDITIONALLY: the key appears + only when the frozenset is non-empty. This preserves + content_hash byte-stability for Methods defined before the + field existed (no spurious `"needed_input_kinds": []` in the + canonical bytes), the same conditional-render precedent + `_canonical_role_requirement` uses for role_kind. """ - return { + subset: dict[str, object] = { "name": self.name.value, "parameters_schema": self.parameters_schema, "capability_id": str(self.capability_id) if self.capability_id is not None else None, @@ -773,6 +824,12 @@ def content_subset(self) -> dict[str, object]: key=lambda r: str(r["role_name"]), ), } + # Conditional render keeps the canonical bytes (and so the + # content_hash) byte-stable for Methods that predate this field: + # the key is inserted only when there is something to hash. + if self.needed_input_kinds: + subset["needed_input_kinds"] = sorted(self.needed_input_kinds) + return subset def _canonical_role_requirement(role: RoleRequirement) -> dict[str, object]: diff --git a/apps/api/src/cora/recipe/features/define_method/command.py b/apps/api/src/cora/recipe/features/define_method/command.py index 26c54c52ae4..2c662cc1efd 100644 --- a/apps/api/src/cora/recipe/features/define_method/command.py +++ b/apps/api/src/cora/recipe/features/define_method/command.py @@ -38,6 +38,14 @@ class DefineMethod: (sample-cleaning Methods need no supplies). Same hashability + `_normalize_for_hash` story as needed_family_ids. + `needed_input_kinds` is a frozenset of input-Dataset kind STRINGS + the Method consumes (for example "raw-projections", "flat-field", + "prior-reconstruction"), NOT Dataset instance UUIDs. + Eventual-consistency: resolved to concrete input Datasets at + Plan-bind / start_run. Default empty frozenset (a Method that + consumes no prior Datasets). Same hashability + `_normalize_for_hash` + story as needed_supplies. + `capability_id` points to the universal Capability template (Recipe BC) this Method realizes as a Method-shaped executor. REQUIRED per Pattern P from @@ -63,6 +71,7 @@ class DefineMethod: execution_pattern: ExecutionPattern needed_family_ids: frozenset[UUID] = field(default_factory=frozenset[UUID]) needed_supplies: frozenset[str] = field(default_factory=frozenset[str]) + needed_input_kinds: frozenset[str] = field(default_factory=frozenset[str]) # needed_assembly_ids declares the Method's cross-BC dependency on # Equipment Assemblies (composition blueprints). Empty means "no # specific composition required, just N Assets satisfying diff --git a/apps/api/src/cora/recipe/features/define_method/decider.py b/apps/api/src/cora/recipe/features/define_method/decider.py index 4f6345b3563..a8a60188dd8 100644 --- a/apps/api/src/cora/recipe/features/define_method/decider.py +++ b/apps/api/src/cora/recipe/features/define_method/decider.py @@ -30,9 +30,11 @@ ExecutorShape, ) from cora.recipe.aggregates.method import ( + METHOD_NEEDED_INPUT_KIND_MAX_LENGTH, METHOD_NEEDED_SUPPLY_KIND_MAX_LENGTH, ExecutionPattern, InvalidMethodMonotoneQualityError, + InvalidMethodNeededInputKindsError, InvalidMethodNeededSuppliesError, Method, MethodAlreadyExistsError, @@ -63,6 +65,8 @@ def decide( (via MethodName VO) - Each needed_supplies kind must be valid -> InvalidMethodNeededSuppliesError + - Each needed_input_kinds kind must be valid + -> InvalidMethodNeededInputKindsError - monotone_quality=True requires execution_pattern == ITERATIVE -> InvalidMethodMonotoneQualityError @@ -102,12 +106,25 @@ def decide( error_class=InvalidMethodNeededSuppliesError, ) trimmed_supplies.append(trimmed) + # defensive per-element validation for needed_input_kinds strings + # (mirrors the needed_supplies pass: Pydantic catches this at the + # API; this protects direct in-process callers AND trims each kind + # so persisted bytes are deterministic). + trimmed_input_kinds: list[str] = [] + for kind in command.needed_input_kinds: + trimmed = validate_bounded_text( + kind, + max_length=METHOD_NEEDED_INPUT_KIND_MAX_LENGTH, + error_class=InvalidMethodNeededInputKindsError, + ) + trimmed_input_kinds.append(trimmed) return [ MethodDefined( method_id=new_id, name=name.value, needed_family_ids=tuple(command.needed_family_ids), needed_supplies=tuple(trimmed_supplies), + needed_input_kinds=tuple(trimmed_input_kinds), capability_id=command.capability_id, needed_assembly_ids=tuple(command.needed_assembly_ids), execution_pattern=command.execution_pattern, diff --git a/apps/api/src/cora/recipe/routes.py b/apps/api/src/cora/recipe/routes.py index b47c3e57328..ce776aceef1 100644 --- a/apps/api/src/cora/recipe/routes.py +++ b/apps/api/src/cora/recipe/routes.py @@ -21,7 +21,7 @@ append entries without restructuring. - 400 (validation): InvalidMethodNameError, InvalidMethodParametersSchemaError, - InvalidMethodNeededSuppliesError + InvalidMethodNeededSuppliesError, InvalidMethodNeededInputKindsError - 404 (load miss): MethodNotFoundError - 409 (defensive guard for AlreadyExists): MethodAlreadyExistsError - 409 (transition guards): future CannotError families @@ -48,6 +48,7 @@ InvalidMethodIterativeStoppingFieldError, InvalidMethodMonotoneQualityError, InvalidMethodNameError, + InvalidMethodNeededInputKindsError, InvalidMethodNeededSuppliesError, InvalidMethodParametersSchemaError, InvalidMethodVersionTagError, @@ -289,6 +290,7 @@ def register_recipe_routes(app: FastAPI) -> None: InvalidCapabilityVersionTagError, InvalidExecutorShapesError, InvalidMethodNameError, + InvalidMethodNeededInputKindsError, InvalidMethodNeededSuppliesError, InvalidMethodParametersSchemaError, InvalidMethodVersionTagError, diff --git a/apps/api/tests/unit/recipe/test_define_method_handler.py b/apps/api/tests/unit/recipe/test_define_method_handler.py index e280ccefa1b..5c2692cda86 100644 --- a/apps/api/tests/unit/recipe/test_define_method_handler.py +++ b/apps/api/tests/unit/recipe/test_define_method_handler.py @@ -97,6 +97,8 @@ async def test_handler_appends_method_defined_event_to_store() -> None: "needed_family_ids": sorted([str(_CAP1), str(_CAP2)]), # needed_supplies. Pinned by test_method_needed_supplies.py. "needed_supplies": [], + # needed_input_kinds. Pinned by test_method_needed_input_kinds.py. + "needed_input_kinds": [], # needed_assembly_ids. Pinned by test_method_needed_assembly_ids.py. "needed_assembly_ids": [], # and round-trips through MethodDefined as a UUID string. diff --git a/apps/api/tests/unit/recipe/test_method_events.py b/apps/api/tests/unit/recipe/test_method_events.py index 704c3f25309..6dcd93934bf 100644 --- a/apps/api/tests/unit/recipe/test_method_events.py +++ b/apps/api/tests/unit/recipe/test_method_events.py @@ -77,6 +77,9 @@ def test_to_payload_serializes_method_defined_to_primitives() -> None: # needed_supplies (default factory). Sorted lexically when # populated; pinned by tests/unit/recipe/test_method_needed_supplies.py. "needed_supplies": [], + # needed_input_kinds (default factory). Sorted lexically when + # populated; pinned by tests/unit/recipe/test_method_needed_input_kinds.py. + "needed_input_kinds": [], # (default). 6l-strict will require the field on the command; # the payload key stays additive for stream-replay compat. "capability_id": None, diff --git a/apps/api/tests/unit/recipe/test_method_evolver.py b/apps/api/tests/unit/recipe/test_method_evolver.py index 41993fef8e1..b0886f8558d 100644 --- a/apps/api/tests/unit/recipe/test_method_evolver.py +++ b/apps/api/tests/unit/recipe/test_method_evolver.py @@ -22,6 +22,7 @@ from cora.recipe.aggregates.method.events import ( MethodDefined, MethodDeprecated, + MethodLaunchSpecUpdated, MethodParametersSchemaUpdated, MethodRequiredRoleAdded, MethodRequiredRoleRemoved, @@ -756,3 +757,108 @@ def test_evolve_method_required_role_removed_preserves_compute_classification() assert without_role.execution_pattern is ExecutionPattern.ITERATIVE assert without_role.monotone_quality is True assert without_role.resumable_from_checkpoint is True + + +# ---------- needed_input_kinds preservation per transition ---------- + + +_INPUT_KINDS = frozenset({"raw-projections", "flat-field"}) + + +def _defined_with_input_kinds( + method_id: UUID, *, status: MethodStatus, version: str | None +) -> Method: + return Method( + id=method_id, + name=MethodName("Tomographic Reconstruction"), + needed_family_ids=frozenset(), + status=status, + version=version, + needed_input_kinds=_INPUT_KINDS, + ) + + +@pytest.mark.unit +def test_evolve_method_versioned_preserves_needed_input_kinds() -> None: + """Critical pin: needed_input_kinds MUST carry through the version + transition (part of content identity; omitting it in this evolver + arm would silently wipe the field to empty).""" + method_id = uuid4() + prior = _defined_with_input_kinds(method_id, status=MethodStatus.DEFINED, version=None) + versioned = evolve( + prior, MethodVersioned(method_id=method_id, version_tag="v2", occurred_at=_NOW) + ) + assert versioned.needed_input_kinds == _INPUT_KINDS + + +@pytest.mark.unit +def test_evolve_method_deprecated_preserves_needed_input_kinds() -> None: + method_id = uuid4() + prior = _defined_with_input_kinds(method_id, status=MethodStatus.VERSIONED, version="v1") + deprecated = evolve(prior, MethodDeprecated(method_id=method_id, occurred_at=_NOW)) + assert deprecated.needed_input_kinds == _INPUT_KINDS + + +@pytest.mark.unit +def test_evolve_method_parameters_schema_updated_preserves_needed_input_kinds() -> None: + method_id = uuid4() + prior = _defined_with_input_kinds(method_id, status=MethodStatus.DEFINED, version=None) + updated = evolve( + prior, + MethodParametersSchemaUpdated( + method_id=method_id, parameters_schema=_SCHEMA_A, occurred_at=_NOW + ), + ) + assert updated.needed_input_kinds == _INPUT_KINDS + + +@pytest.mark.unit +def test_evolve_method_launch_spec_updated_preserves_needed_input_kinds() -> None: + """Launch-spec update is orthogonal to the consumes axis; it must + carry needed_input_kinds through. None clears the spec without + touching the field.""" + method_id = uuid4() + prior = _defined_with_input_kinds(method_id, status=MethodStatus.DEFINED, version=None) + updated = evolve( + prior, + MethodLaunchSpecUpdated(method_id=method_id, launch_spec=None, occurred_at=_NOW), + ) + assert updated.needed_input_kinds == _INPUT_KINDS + + +@pytest.mark.unit +def test_evolve_method_required_role_added_preserves_needed_input_kinds() -> None: + method_id = uuid4() + prior = _defined_with_input_kinds(method_id, status=MethodStatus.DEFINED, version=None) + with_role = evolve( + prior, + MethodRequiredRoleAdded( + method_id=method_id, + role_name="detector", + family_id=uuid4(), + required_ports=(), + optional=False, + occurred_at=_NOW, + ), + ) + assert with_role.needed_input_kinds == _INPUT_KINDS + + +@pytest.mark.unit +def test_evolve_method_required_role_removed_preserves_needed_input_kinds() -> None: + method_id = uuid4() + role = RoleRequirement(role_name=RoleName("detector"), family_id=uuid4()) + prior = Method( + id=method_id, + name=MethodName("Tomographic Reconstruction"), + needed_family_ids=frozenset(), + status=MethodStatus.DEFINED, + needed_input_kinds=_INPUT_KINDS, + required_roles=frozenset({role}), + ) + without_role = evolve( + prior, + MethodRequiredRoleRemoved(method_id=method_id, role_name="detector", occurred_at=_NOW), + ) + assert without_role.required_roles == frozenset() + assert without_role.needed_input_kinds == _INPUT_KINDS diff --git a/apps/api/tests/unit/recipe/test_method_needed_input_kinds.py b/apps/api/tests/unit/recipe/test_method_needed_input_kinds.py new file mode 100644 index 00000000000..78d9d21470a --- /dev/null +++ b/apps/api/tests/unit/recipe/test_method_needed_input_kinds.py @@ -0,0 +1,451 @@ +"""Unit tests for Method.needed_input_kinds. + +Covers: + - State default + frozenset shape + - Decider per-element validation (whitespace-only, oversized, trims) + - Decider accepts empty + populated + - Event payload sorted lexically (deterministic hash) + - Event roundtrip preserves needed_input_kinds + - Legacy event payload (no needed_input_kinds key) folds via additive + evolution to empty frozenset (forward-compat critical pin) + - Evolver fold for MethodDefined sets the field + - Each transition (Versioned, Deprecated, ParametersSchemaUpdated) + PRESERVES needed_input_kinds through (preserve-fields invariant) + - content_subset() includes "needed_input_kinds" (sorted) only when + non-empty AND omits the key entirely when empty (byte-stability) + +needed_input_kinds elements are input-Dataset kind STRINGS the Method +consumes (for example "raw-projections", "flat-field", +"prior-reconstruction"), never Dataset instance UUIDs. Unlike +needed_supplies the element type carries no DeferredVocabulary marker. +""" + +from datetime import UTC, datetime +from uuid import UUID, uuid4 + +import pytest + +from cora.infrastructure.ports.event_store import StoredEvent +from cora.recipe.aggregates.capability import ( + Capability, + CapabilityCode, + CapabilityName, + ExecutorShape, +) +from cora.recipe.aggregates.method import ( + ExecutionPattern, + InvalidMethodNeededInputKindsError, + Method, + MethodDefined, + MethodDeprecated, + MethodName, + MethodParametersSchemaUpdated, + MethodStatus, + MethodVersioned, + event_type_name, + evolve, + fold, + from_stored, + to_payload, +) +from cora.recipe.features import define_method +from cora.recipe.features.define_method import DefineMethod + + +def _capability() -> Capability: + """Shared Capability fixture for these decider tests.""" + return Capability( + id=UUID("01900000-0000-7000-8000-00000000c1da"), + code=CapabilityCode("cora.capability.x"), + name=CapabilityName("X"), + executor_shapes=frozenset({ExecutorShape.METHOD}), + ) + + +_CAP = _capability() + +_NOW = datetime(2026, 5, 14, 12, 0, 0, tzinfo=UTC) + + +# ---------- Method state shape ---------- + + +@pytest.mark.unit +def test_method_state_defaults_needed_input_kinds_to_empty_frozenset() -> None: + """Legacy Methods (no payload key) and freshly-defined Methods + that don't declare input kinds both land at empty. The + default-factory keeps state shape uniform.""" + method = Method( + id=uuid4(), + name=MethodName("X"), + ) + assert method.needed_input_kinds == frozenset() + + +@pytest.mark.unit +def test_method_state_carries_supplied_needed_input_kinds() -> None: + method = Method( + id=uuid4(), + name=MethodName("Tomographic Reconstruction"), + needed_input_kinds=frozenset({"raw-projections", "flat-field"}), + ) + assert method.needed_input_kinds == frozenset({"raw-projections", "flat-field"}) + + +# ---------- Decider validation ---------- + + +@pytest.mark.unit +def test_decider_accepts_empty_needed_input_kinds() -> None: + """A Method that consumes no prior Datasets is valid (the field + defaults empty).""" + events = define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="Acquisition", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset(), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + assert events[0].needed_input_kinds == () + + +@pytest.mark.unit +def test_decider_accepts_populated_needed_input_kinds() -> None: + events = define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="Tomographic Reconstruction", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({"raw-projections", "flat-field"}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + assert set(events[0].needed_input_kinds) == {"raw-projections", "flat-field"} + + +@pytest.mark.unit +def test_decider_trims_each_input_kind_string() -> None: + """Each kind goes through validate_bounded_text (1-50 chars, + trimmed). Mirrors the needed_supplies trim behavior.""" + events = define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="X", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({" raw-projections ", "flat-field"}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + assert "raw-projections" in set(events[0].needed_input_kinds) + assert " raw-projections " not in set(events[0].needed_input_kinds) + + +@pytest.mark.unit +def test_decider_rejects_whitespace_only_input_kind() -> None: + with pytest.raises(InvalidMethodNeededInputKindsError): + define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="X", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({" "}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + + +@pytest.mark.unit +def test_decider_rejects_empty_input_kind() -> None: + with pytest.raises(InvalidMethodNeededInputKindsError): + define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="X", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({""}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + + +@pytest.mark.unit +def test_decider_rejects_oversized_input_kind() -> None: + """Per-element bound is 50 chars (mirrors the needed_supplies bound).""" + with pytest.raises(InvalidMethodNeededInputKindsError): + define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="X", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({"x" * 51}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + + +@pytest.mark.unit +def test_decider_accepts_max_length_input_kind() -> None: + boundary = "x" * 50 + events = define_method.decide( + state=None, + command=DefineMethod( + execution_pattern=ExecutionPattern.BATCH, + capability_id=_CAP.id, + name="X", + needed_family_ids=frozenset(), + needed_input_kinds=frozenset({boundary}), + ), + capability=_CAP, + now=_NOW, + new_id=uuid4(), + ) + assert boundary in set(events[0].needed_input_kinds) + + +# ---------- Event payload determinism ---------- + + +@pytest.mark.unit +def test_to_payload_sorts_needed_input_kinds_lexically() -> None: + """Same logical kind set must serialize to the same payload bytes + (idempotency-hash determinism). Sorting in to_payload is the + contract.""" + event = MethodDefined( + method_id=uuid4(), + name="X", + needed_family_ids=(), + needed_input_kinds=("raw-projections", "flat-field", "prior-reconstruction"), + occurred_at=_NOW, + ) + payload = to_payload(event) + assert payload["needed_input_kinds"] == [ + "flat-field", + "prior-reconstruction", + "raw-projections", + ] + + +@pytest.mark.unit +def test_event_round_trips_with_needed_input_kinds() -> None: + original = MethodDefined( + method_id=uuid4(), + name="Tomographic Reconstruction", + needed_family_ids=(), + needed_input_kinds=("raw-projections", "flat-field"), + occurred_at=_NOW, + ) + stored = _stored("MethodDefined", to_payload(original)) + rebuilt = from_stored(stored) + assert isinstance(rebuilt, MethodDefined) + assert set(rebuilt.needed_input_kinds) == {"raw-projections", "flat-field"} + + +# ---------- Legacy backward-compat (additive evolution) ---------- + + +@pytest.mark.unit +def test_legacy_event_payload_folds_with_empty_needed_input_kinds() -> None: + """Critical forward-compat pin. Legacy MethodDefined payloads + have NO needed_input_kinds key. additive-evolution: from_stored uses + payload.get(..., default), so the rebuilt event carries empty + tuple, and the evolver folds into empty frozenset.""" + legacy_payload: dict[str, object] = { + "method_id": str(uuid4()), + "name": "Legacy Method", + "needed_family_ids": [], + "occurred_at": _NOW.isoformat(), + # No needed_input_kinds key -- legacy payload shape. + } + stored = _stored("MethodDefined", legacy_payload) + rebuilt = from_stored(stored) + assert isinstance(rebuilt, MethodDefined) + assert rebuilt.needed_input_kinds == () + state = evolve(None, rebuilt) + assert state.needed_input_kinds == frozenset() + + +# ---------- Evolver fold ---------- + + +@pytest.mark.unit +def test_evolve_method_defined_sets_needed_input_kinds() -> None: + method_id = uuid4() + event = MethodDefined( + method_id=method_id, + name="Tomographic Reconstruction", + needed_family_ids=(), + needed_input_kinds=("raw-projections", "flat-field"), + occurred_at=_NOW, + ) + state = evolve(None, event) + assert state.needed_input_kinds == frozenset({"raw-projections", "flat-field"}) + + +# ---------- Preserve-fields invariant per transition ---------- + + +def _seed_state(input_kinds: frozenset[str]) -> Method: + return evolve( + None, + MethodDefined( + method_id=uuid4(), + name="Tomographic Reconstruction", + needed_family_ids=(), + needed_input_kinds=tuple(input_kinds), + occurred_at=_NOW, + ), + ) + + +@pytest.mark.unit +def test_evolve_method_versioned_preserves_needed_input_kinds() -> None: + seed = _seed_state(frozenset({"raw-projections"})) + after = evolve(seed, MethodVersioned(method_id=seed.id, version_tag="v2", occurred_at=_NOW)) + assert after.needed_input_kinds == frozenset({"raw-projections"}) + assert after.status is MethodStatus.VERSIONED + + +@pytest.mark.unit +def test_evolve_method_deprecated_preserves_needed_input_kinds() -> None: + seed = _seed_state(frozenset({"raw-projections", "flat-field"})) + after = evolve(seed, MethodDeprecated(method_id=seed.id, occurred_at=_NOW)) + assert after.needed_input_kinds == frozenset({"raw-projections", "flat-field"}) + assert after.status is MethodStatus.DEPRECATED + + +@pytest.mark.unit +def test_evolve_method_parameters_schema_updated_preserves_needed_input_kinds() -> None: + """Orthogonal-facet update must NOT wipe needed_input_kinds.""" + seed = _seed_state(frozenset({"raw-projections"})) + after = evolve( + seed, + MethodParametersSchemaUpdated( + method_id=seed.id, + parameters_schema={"type": "object"}, + occurred_at=_NOW, + ), + ) + assert after.needed_input_kinds == frozenset({"raw-projections"}) + + +@pytest.mark.unit +def test_fold_full_lifecycle_preserves_needed_input_kinds() -> None: + """End-to-end: defined -> versioned -> schema-updated -> deprecated. + needed_input_kinds survives the whole chain.""" + method_id = uuid4() + state = fold( + [ + MethodDefined( + method_id=method_id, + name="Tomographic Reconstruction", + needed_family_ids=(), + needed_input_kinds=("raw-projections", "flat-field"), + occurred_at=_NOW, + ), + MethodVersioned(method_id=method_id, version_tag="v2", occurred_at=_NOW), + MethodParametersSchemaUpdated( + method_id=method_id, + parameters_schema={"type": "object"}, + occurred_at=_NOW, + ), + MethodDeprecated(method_id=method_id, occurred_at=_NOW), + ] + ) + assert state is not None + assert state.needed_input_kinds == frozenset({"raw-projections", "flat-field"}) + assert state.status is MethodStatus.DEPRECATED + + +# ---------- content_subset conditional render (byte-stability) ---------- + + +@pytest.mark.unit +def test_content_subset_includes_sorted_needed_input_kinds_when_non_empty() -> None: + """A non-empty frozenset renders as a sorted list under the + "needed_input_kinds" key so the content_hash is deterministic.""" + method = Method( + id=uuid4(), + name=MethodName("Tomographic Reconstruction"), + needed_input_kinds=frozenset({"raw-projections", "flat-field"}), + ) + subset = method.content_subset() + assert subset["needed_input_kinds"] == ["flat-field", "raw-projections"] + + +@pytest.mark.unit +def test_content_subset_omits_needed_input_kinds_key_when_empty() -> None: + """Empty frozenset omits the key entirely so Methods predating the + field keep byte-stable canonical bytes (no spurious empty list).""" + method = Method( + id=uuid4(), + name=MethodName("X"), + ) + subset = method.content_subset() + assert "needed_input_kinds" not in subset + + +# ---------- Helper ---------- + + +def _stored(event_type: str, payload: dict[str, object]) -> StoredEvent: + return StoredEvent( + position=1, + event_id=uuid4(), + stream_type="Method", + stream_id=uuid4(), + version=1, + event_type=event_type, + schema_version=1, + payload=payload, + correlation_id=uuid4(), + causation_id=None, + occurred_at=_NOW, + recorded_at=_NOW, + ) + + +# ---------- event_type_name (sanity) ---------- + + +@pytest.mark.unit +def test_event_type_name_for_method_defined_unchanged_with_input_kinds() -> None: + """The event class name does not change -- additive payload + evolution only. Pinned because subscribers route by event_type.""" + event = MethodDefined( + method_id=uuid4(), + name="X", + needed_family_ids=(), + needed_input_kinds=(), + occurred_at=_NOW, + ) + assert event_type_name(event) == "MethodDefined" + + +# Suppress pyright warnings on the test-only state seed factory. +_ = UUID # marker so the import is referenced (used by stored helper return type). From a90c4917e72493d203360e42c2aa803c2928c6ff Mon Sep 17 00:00:00 2001 From: Doga Gursoy Date: Fri, 26 Jun 2026 08:25:24 +0300 Subject: [PATCH 7/7] feat(run): Run declares input_dataset_ids, leg C sub-slice 3 (triage spike) Adds an optional additive frozenset[UUID] field to the Run aggregate: the input Dataset id(s) a reconstruction consumes (PROV `used`, targeting the Dataset entity), the per-Run binding that resolves Method.needed_input_kinds (C2). Mirrors pinned_calibration_ids exactly: id-only atomic refs, NO cross-BC existence check (eventual-consistency), cardinality-validated (<=64) via validate_input_dataset_ids + InvalidInputDatasetsError (registered 400), threaded through the StartRun command, the RunStarted payload (always-rendered), and all 11 reconstructing evolver arms. Optional-by-default and domain-only: no projection column (so no migration), the field is empty for existing callers so the start_run route body and MCP tool are unchanged. The start_run gate that reads each input Dataset's Verified Distribution is the next sub-slice (C4), which needs gate-review before merge. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/cora/run/aggregates/run/__init__.py | 6 + .../api/src/cora/run/aggregates/run/events.py | 19 +- .../src/cora/run/aggregates/run/evolver.py | 28 +- apps/api/src/cora/run/aggregates/run/state.py | 63 +++ .../cora/run/features/start_run/command.py | 9 + .../cora/run/features/start_run/decider.py | 14 + apps/api/src/cora/run/routes.py | 5 + .../test_start_run_handler_postgres.py | 3 + apps/api/tests/unit/run/test_run_events.py | 4 + apps/api/tests/unit/run/test_run_evolver.py | 228 +++++++++++ .../unit/run/test_run_input_dataset_ids.py | 359 ++++++++++++++++++ 11 files changed, 736 insertions(+), 2 deletions(-) create mode 100644 apps/api/tests/unit/run/test_run_input_dataset_ids.py diff --git a/apps/api/src/cora/run/aggregates/run/__init__.py b/apps/api/src/cora/run/aggregates/run/__init__.py index 35e2bf26e36..bd317ff5237 100644 --- a/apps/api/src/cora/run/aggregates/run/__init__.py +++ b/apps/api/src/cora/run/aggregates/run/__init__.py @@ -52,11 +52,13 @@ READING_UNITS_MAX_LENGTH, RUN_EXTERNAL_REF_ID_MAX_LENGTH, RUN_EXTERNAL_REF_SCHEME_MAX_LENGTH, + RUN_INPUT_DATASETS_MAX_ENTRIES, RUN_NAME_MAX_LENGTH, RUN_PINNED_CALIBRATIONS_MAX_ENTRIES, SAMPLING_PROCEDURE_VALUES, ChannelName, InvalidChannelNameError, + InvalidInputDatasetsError, InvalidObservationValueError, InvalidPinnedCalibrationsError, InvalidRunAbortReasonError, @@ -101,6 +103,7 @@ RunSupplyCoverageMismatchError, RunTruncateReason, SamplingProcedure, + validate_input_dataset_ids, validate_pinned_calibration_ids, ) @@ -111,6 +114,7 @@ "READING_UNITS_MAX_LENGTH", "RUN_EXTERNAL_REF_ID_MAX_LENGTH", "RUN_EXTERNAL_REF_SCHEME_MAX_LENGTH", + "RUN_INPUT_DATASETS_MAX_ENTRIES", "RUN_NAME_MAX_LENGTH", "RUN_PINNED_CALIBRATIONS_MAX_ENTRIES", "SAMPLING_PROCEDURE_VALUES", @@ -122,6 +126,7 @@ "InMemoryFeedHeartbeatStore", "InMemoryObservationStore", "InvalidChannelNameError", + "InvalidInputDatasetsError", "InvalidObservationValueError", "InvalidPinnedCalibrationsError", "InvalidRunAbortReasonError", @@ -191,5 +196,6 @@ "to_payload", "validate_adjusted_parameters_against_method_schema", "validate_effective_parameters_against_method_schema", + "validate_input_dataset_ids", "validate_pinned_calibration_ids", ] diff --git a/apps/api/src/cora/run/aggregates/run/events.py b/apps/api/src/cora/run/aggregates/run/events.py index c421c50e5c4..392ed9613d0 100644 --- a/apps/api/src/cora/run/aggregates/run/events.py +++ b/apps/api/src/cora/run/aggregates/run/events.py @@ -216,6 +216,16 @@ class RunStarted: # `payload.get("pinned_calibration_ids", [])` returning an empty list # for legacy streams without the field. pinned_calibration_ids: tuple[UUID, ...] = () + # input Dataset references (PROV `used`): the set of + # Dataset ids a reconstruction Run consumes. Each reference targets + # the Dataset, not a Distribution. Tuple (not frozenset) on the + # event payload for deterministic byte ordering during replay; the + # evolver reconstructs the frozenset. NO cross-BC existence check at + # the decider (id-only atomic refs; cross-BC eventual-consistency + # stance, same as pinned_calibration_ids). Forward-compat via + # `payload.get("input_dataset_ids", [])` returning an empty list for + # legacy streams without the field. + input_dataset_ids: tuple[UUID, ...] = () @dataclass(frozen=True) @@ -628,6 +638,7 @@ def to_payload(event: RunEvent) -> dict[str, Any]: campaign_id=campaign_id, decided_by_decision_id=decided_by_decision_id, pinned_calibration_ids=pinned_calibration_ids, + input_dataset_ids=input_dataset_ids, occurred_at=occurred_at, ): return { @@ -660,6 +671,10 @@ def to_payload(event: RunEvent) -> dict[str, Any]: # deterministic byte ordering (the typed in-memory shape is # frozenset; the wire shape is a sorted list for stable bytes). "pinned_calibration_ids": sorted(str(pin) for pin in pinned_calibration_ids), + # Dataset ids sorted lexicographically for deterministic byte + # ordering (the typed in-memory shape is frozenset; the wire + # shape is a sorted list for stable bytes). + "input_dataset_ids": sorted(str(ds) for ds in input_dataset_ids), "occurred_at": occurred_at.isoformat(), } case RunHeld( @@ -835,7 +850,8 @@ def _build_run_started() -> RunStarted: # `trigger_source`, `external_refs`, # `acknowledged_cautions`, `campaign_id`, # `decided_by_decision_id` (Decision-to-Run linkage), - # `pinned_calibration_ids` (Calibration AsShot anchor) + # `pinned_calibration_ids` (Calibration AsShot anchor), + # `input_dataset_ids` (PROV `used` input Dataset refs) # were all added additively. Each .get(...) returns # the field's default when the key isn't in the jsonb # payload, so legacy streams replay without an upcaster. @@ -870,6 +886,7 @@ def _build_run_started() -> RunStarted: pinned_calibration_ids=tuple( UUID(p) for p in payload.get("pinned_calibration_ids", []) ), + input_dataset_ids=tuple(UUID(x) for x in payload.get("input_dataset_ids", [])), occurred_at=datetime.fromisoformat(payload["occurred_at"]), ) diff --git a/apps/api/src/cora/run/aggregates/run/evolver.py b/apps/api/src/cora/run/aggregates/run/evolver.py index fd841d34c5a..c1d9a802ded 100644 --- a/apps/api/src/cora/run/aggregates/run/evolver.py +++ b/apps/api/src/cora/run/aggregates/run/evolver.py @@ -34,7 +34,8 @@ `name`, `plan_id`, `subject_id`, `raid`, `override_parameters`, `effective_parameters`, `trigger_source`, `observation_logbook_id`, `external_refs`, `campaign_id`, `last_adjusted_at`, -`last_adjusted_by`, `adjustment_count`, AND `actuation_kind` through +`last_adjusted_by`, `adjustment_count`, `pinned_calibration_ids`, +`input_dataset_ids`, AND `actuation_kind` through from prior state. Constructing `Run(id=..., name=..., plan_id=..., subject_id=..., status=...)` without explicitly passing the additive fields would silently WIPE them to defaults (empty dict / None / empty @@ -123,6 +124,7 @@ def evolve(state: Run | None, event: RunEvent) -> Run: external_refs=external_refs, campaign_id=campaign_id, pinned_calibration_ids=pinned_calibration_ids, + input_dataset_ids=input_dataset_ids, ): _ = state # RunStarted is the genesis event; prior state ignored. # Shallow-copy the payload dicts into state so mutating either @@ -149,6 +151,10 @@ def evolve(state: Run | None, event: RunEvent) -> Run: # memory equality semantics; the event carries a tuple for # deterministic wire byte ordering). pinned_calibration_ids=frozenset(pinned_calibration_ids), + # Input Dataset refs set at genesis (frozenset for in- + # memory equality; the event carries a tuple for stable + # wire bytes). + input_dataset_ids=frozenset(input_dataset_ids), # No conduct provenance at genesis; a terminal event sets it. actuation_kind=None, ) @@ -172,6 +178,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -195,6 +203,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -218,6 +228,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance: the terminal event carries the # observed kind for a conducted Run; None for a normal # complete issued outside a conduct. @@ -243,6 +255,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance: a failed conduct still taints # (the kind rides the abort event); None for operator # aborts. @@ -268,6 +282,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -291,6 +307,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -328,6 +346,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: # form of the AsShot rule (even mid-flight steering can't # change what calibration the Run was acquired against). pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across mid-flight steering. actuation_kind=prior.actuation_kind, ) @@ -354,6 +374,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -382,6 +404,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) @@ -410,6 +434,8 @@ def evolve(state: Run | None, event: RunEvent) -> Run: adjustment_count=prior.adjustment_count, # AsShot invariant: never change after start. pinned_calibration_ids=prior.pinned_calibration_ids, + # Input Dataset refs preserved verbatim across this arm. + input_dataset_ids=prior.input_dataset_ids, # Conduct provenance preserved across non-terminal arms. actuation_kind=prior.actuation_kind, ) diff --git a/apps/api/src/cora/run/aggregates/run/state.py b/apps/api/src/cora/run/aggregates/run/state.py index bbfffa21d1f..137de98a153 100644 --- a/apps/api/src/cora/run/aggregates/run/state.py +++ b/apps/api/src/cora/run/aggregates/run/state.py @@ -124,6 +124,14 @@ # anti-hook #3 — but unbounded set growth would still bloat events + # payloads with no domain justification). RUN_PINNED_CALIBRATIONS_MAX_ENTRIES = 64 +# cardinality cap on the input Dataset ref set +# (Run.input_dataset_ids). Same default + same precedent justification +# as RUN_PINNED_CALIBRATIONS_MAX_ENTRIES: per-entry existence is NOT +# checked at the write path (the cited Datasets are cross-BC eventual- +# consistency references, PROV `used` atomic IDs targeting the Dataset +# not a Distribution) but unbounded set growth would still bloat events +# + payloads with no domain justification. +RUN_INPUT_DATASETS_MAX_ENTRIES = 64 # `Identifier(scheme, value)` carries open-scheme anti-corruption refs # mirroring the Safety BC's ExternalBinding shape (proposal / btr / @@ -1203,6 +1211,19 @@ class Run: # (Q5/Q6 research). Defaults to empty frozenset so legacy streams # without the field fold cleanly via `payload.get("pinned_calibration_ids", [])`. pinned_calibration_ids: frozenset[UUID] = field(default_factory=frozenset[UUID]) + # input Dataset reference set: the Dataset id(s) a + # reconstruction Run consumes (PROV `used`: an Activity used an + # Entity; the reference targets the DATASET, not a Distribution). + # Each entry is a Dataset.id. NO cross-BC existence check at the + # write path (id-only atomic refs; cross-BC eventual-consistency + # stance, same as pinned_calibration_ids); only set cardinality is + # validated. Defaults to empty frozenset so legacy streams without + # the field fold cleanly via + # `payload.get("input_dataset_ids", [])` (additive-state pattern). + # The start_run gate will later read each input Dataset's Verified + # Distribution; that read goes through the Data BC, never a fold- + # time check here. + input_dataset_ids: frozenset[UUID] = field(default_factory=frozenset[UUID]) # conduct-observed actuation provenance. None until a terminal # event sets it: only RunCompleted / RunAborted issued by the # compute CONDUCT runtime (`Reckoner`) carry a non-None @@ -1355,3 +1376,45 @@ def validate_pinned_calibration_ids(value: frozenset[UUID]) -> frozenset[UUID]: if len(value) > RUN_PINNED_CALIBRATIONS_MAX_ENTRIES: raise InvalidPinnedCalibrationsError(len(value)) return value + + +class InvalidInputDatasetsError(ValueError): + """The supplied input_dataset_ids set has too many entries. + + Per-entry validation (each is a UUID) is type-enforced; the + set-cardinality cap protects against accidentally massive input- + Dataset reference payloads on a single reconstruction Run start. + Mirrors `InvalidPinnedCalibrationsError` shape exactly (same + precedent + same default cap of 64). Validated at the decider; the + API boundary also enforces `max_length` via Pydantic for fast 422 + failures on obviously-malformed input. + + NO cross-BC existence check on the cited Dataset ids (PROV `used` + atomic-ID model targeting the Dataset, not a Distribution) + + canonical DDD eventual-consistency stance on cross-aggregate rules + (Vernon/Evans). Symmetric to the pinned_calibration_ids decider- + time treatment. + + Mapped to HTTP 400. + """ + + def __init__(self, count: int) -> None: + super().__init__( + f"Run input_dataset_ids must have at most " + f"{RUN_INPUT_DATASETS_MAX_ENTRIES} entries (got: {count})" + ) + self.count = count + + +def validate_input_dataset_ids(value: frozenset[UUID]) -> frozenset[UUID]: + """Normalize / validate input_dataset_ids for the Run state and decider. + + Cardinality-only check. NO per-element existence check (PROV `used` + atomic-ID model targeting the Dataset, not a Distribution; cross-BC + eventual-consistency per Vernon/Evans DDD canon). Mirrors + `validate_pinned_calibration_ids` exactly: same shape, same default + cap, same justification. + """ + if len(value) > RUN_INPUT_DATASETS_MAX_ENTRIES: + raise InvalidInputDatasetsError(len(value)) + return value diff --git a/apps/api/src/cora/run/features/start_run/command.py b/apps/api/src/cora/run/features/start_run/command.py index 9a6913b7830..99324e7cfde 100644 --- a/apps/api/src/cora/run/features/start_run/command.py +++ b/apps/api/src/cora/run/features/start_run/command.py @@ -90,3 +90,12 @@ class StartRun: # stance); a downstream consumer that needs to read the pinned # CalibrationRevision still goes through the Calibration BC. pinned_calibration_ids: frozenset[UUID] = field(default_factory=frozenset[UUID]) + # input Dataset references (PROV `used`): the set of + # Dataset ids a reconstruction Run consumes. Each reference targets + # the Dataset, not a Distribution. Operator-supplied (or, in the + # autonomous-CT future, agent-supplied). IMMUTABLE on the Run + # aggregate after start_run, like pinned_calibration_ids. NO cross- + # BC existence check at the decider (cross-BC eventual-consistency + # stance); the start_run gate that reads each input Dataset's + # Verified Distribution goes through the Data BC. + input_dataset_ids: frozenset[UUID] = field(default_factory=frozenset[UUID]) diff --git a/apps/api/src/cora/run/features/start_run/decider.py b/apps/api/src/cora/run/features/start_run/decider.py index 094e7fd4fc1..367a461be94 100644 --- a/apps/api/src/cora/run/features/start_run/decider.py +++ b/apps/api/src/cora/run/features/start_run/decider.py @@ -99,6 +99,7 @@ RunSubjectNotMountableError, check_safety_envelope, validate_effective_parameters_against_method_schema, + validate_input_dataset_ids, validate_pinned_calibration_ids, ) from cora.run.features.start_run.command import StartRun @@ -193,6 +194,9 @@ def decide( - pinned_calibration_ids cardinality must be within bound -> InvalidPinnedCalibrationsError (via validate_pinned_calibration_ids) + - input_dataset_ids cardinality must be within bound + -> InvalidInputDatasetsError + (via validate_input_dataset_ids) `needed_family_ids_snapshot` is the Method's needed_family_ids set the handler resolved transitively from `plan.practice_id → @@ -315,6 +319,13 @@ def decide( # for Dataset.used_calibration_ids exactly. pinned_calibration_ids = validate_pinned_calibration_ids(command.pinned_calibration_ids) + # cardinality cap on the input Dataset reference set + # (PROV `used`). NO cross-BC existence check (id-only atomic refs; + # eventual-consistency stance, same as pinned_calibration_ids). The + # start_run gate that reads each input Dataset's Verified + # Distribution lands separately and goes through the Data BC. + input_dataset_ids = validate_input_dataset_ids(command.input_dataset_ids) + # build the acknowledged_cautions snapshot for the # RunStarted event payload. Per the Caution design memo, this # snapshot IS the ack (anti-pattern #7: ack lives on the @@ -356,6 +367,9 @@ def decide( # payload (frozenset has no inherent order). The cardinality # check ran earlier via validate_pinned_calibration_ids (12b-5). pinned_calibration_ids=tuple(sorted(pinned_calibration_ids)), + # sort for deterministic byte-form on the event payload; the + # cardinality check ran earlier via validate_input_dataset_ids. + input_dataset_ids=tuple(sorted(input_dataset_ids)), occurred_at=now, ) ] diff --git a/apps/api/src/cora/run/routes.py b/apps/api/src/cora/run/routes.py index 5c514917140..71e3d368a75 100644 --- a/apps/api/src/cora/run/routes.py +++ b/apps/api/src/cora/run/routes.py @@ -46,6 +46,7 @@ InvalidRunAdjustSchemaError, InvalidRunAdjustReasonError - 409 (Run adjust transition guard, 6j): RunCannotAdjustError - 400 (validation, 12b-5 adds): InvalidPinnedCalibrationsError + - 400 (validation): InvalidInputDatasetsError """ from fastapi import FastAPI, Request, status @@ -53,6 +54,7 @@ from cora.run.aggregates.run import ( InvalidChannelNameError, + InvalidInputDatasetsError, InvalidObservationValueError, InvalidPinnedCalibrationsError, InvalidRunAbortReasonError, @@ -196,6 +198,9 @@ def register_run_routes(app: FastAPI) -> None: # Pin-set cardinality cap on AsShot citation (12b-5; symmetric # to Data BC's InvalidUsedCalibrationsError on register_dataset). InvalidPinnedCalibrationsError, + # Input-Dataset reference set cardinality cap (PROV `used`; + # symmetric to the pinned_calibration_ids cap). + InvalidInputDatasetsError, ): app.add_exception_handler(validation_cls, _handle_validation_error) for not_found_cls in (RunNotFoundError,): diff --git a/apps/api/tests/integration/test_start_run_handler_postgres.py b/apps/api/tests/integration/test_start_run_handler_postgres.py index 06aae9b4b08..8f26c9940f4 100644 --- a/apps/api/tests/integration/test_start_run_handler_postgres.py +++ b/apps/api/tests/integration/test_start_run_handler_postgres.py @@ -203,6 +203,9 @@ async def test_start_run_persists_event_with_full_upstream_chain_against_postgre # pins. Empty tuple by default; forward-compat via # `payload.get("pinned_calibration_ids", [])`. "pinned_calibration_ids": [], + # input Dataset refs (PROV `used`). Empty list by default; + # forward-compat via `payload.get("input_dataset_ids", [])`. + "input_dataset_ids": [], "occurred_at": _NOW.isoformat(), } assert stored.event_id == run_event_id diff --git a/apps/api/tests/unit/run/test_run_events.py b/apps/api/tests/unit/run/test_run_events.py index 4eb333ae33c..6c16ecf701a 100644 --- a/apps/api/tests/unit/run/test_run_events.py +++ b/apps/api/tests/unit/run/test_run_events.py @@ -103,6 +103,10 @@ def test_to_payload_serializes_run_started_with_subject_to_primitives() -> None: # StartRun.pinned_calibration_ids was empty; forward-compat via # `payload.get("pinned_calibration_ids", [])`. "pinned_calibration_ids": [], + # sorted list of input Dataset ids (PROV `used`). Empty when + # StartRun.input_dataset_ids was empty; forward-compat via + # `payload.get("input_dataset_ids", [])`. + "input_dataset_ids": [], "occurred_at": _NOW.isoformat(), } diff --git a/apps/api/tests/unit/run/test_run_evolver.py b/apps/api/tests/unit/run/test_run_evolver.py index 5a4a2048171..d5fb70741cd 100644 --- a/apps/api/tests/unit/run/test_run_evolver.py +++ b/apps/api/tests/unit/run/test_run_evolver.py @@ -1233,6 +1233,234 @@ def test_run_removed_from_campaign_preserves_pinned_calibration_ids() -> None: assert state.pinned_calibration_ids == frozenset({pin_a}) +@pytest.mark.unit +def test_run_started_genesis_populates_input_dataset_ids_as_frozenset() -> None: + """RunStarted carries the tuple-form on the event payload; the + evolver coerces to frozenset for in-memory equality semantics.""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + ds_b = UUID("01900000-0000-7000-8000-0000000d5002") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Reconstruction", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a, ds_b), + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a, ds_b}) + + +@pytest.mark.unit +def test_legacy_run_without_input_dataset_ids_folds_to_empty_frozenset() -> None: + """Legacy Runs have no input_dataset_ids on RunStarted. They MUST + fold to an empty frozenset; additive backward-compat contract.""" + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Legacy Run without input Datasets", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset() + + +@pytest.mark.unit +@pytest.mark.parametrize( + "terminal_factory", + [_make_completed, _make_aborted, _make_stopped, _make_truncated], +) +def test_each_terminal_preserves_input_dataset_ids( + terminal_factory: _TerminalFactory, +) -> None: + """Critical invariant: every terminal arm preserves the + input_dataset_ids set verbatim. A regression that wiped them would + silently break the PROV `used` lineage of a reconstruction Run.""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + ds_b = UUID("01900000-0000-7000-8000-0000000d5002") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a, ds_b), + ), + terminal_factory(run_id), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a, ds_b}) + + +@pytest.mark.unit +def test_hold_resume_cycle_preserves_input_dataset_ids() -> None: + """Hold + Resume are routine mid-flight; they must NOT touch the + input Dataset ref set.""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a,), + ), + RunHeld(run_id=run_id, occurred_at=_NOW), + RunResumed(run_id=run_id, occurred_at=_NOW), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a}) + + +@pytest.mark.unit +def test_adjust_run_preserves_input_dataset_ids() -> None: + """Even mid-flight parameter steering (adjust_run) MUST preserve the + input Dataset ref set.""" + from cora.run.aggregates.run.events import RunAdjusted + + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a,), + ), + RunAdjusted( + run_id=run_id, + parameters_patch={"a": 1}, + effective_parameters={"a": 1}, + reason="adjust", + adjusted_by=ActorId(uuid4()), + occurred_at=_NOW, + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a}) + + +@pytest.mark.unit +def test_reading_logbook_opened_preserves_input_dataset_ids() -> None: + """Orthogonal arm: lazy logbook open MUST preserve the input Dataset + ref set.""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + ds_b = UUID("01900000-0000-7000-8000-0000000d5002") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a, ds_b), + ), + RunObservationLogbookOpened( + run_id=run_id, + logbook_id=uuid4(), + kind=LOGBOOK_KIND_OBSERVATION, + schema=OBSERVATION_LOGBOOK_SCHEMA, + occurred_at=_NOW, + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a, ds_b}) + + +@pytest.mark.unit +def test_run_added_to_campaign_preserves_input_dataset_ids() -> None: + """Orthogonal arm: post-hoc Campaign membership assignment MUST + preserve the input Dataset ref set.""" + from cora.run.aggregates.run.events import RunAddedToCampaign + + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + run_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a,), + ), + RunAddedToCampaign( + run_id=run_id, + campaign_id=uuid4(), + occurred_at=_NOW, + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a}) + + +@pytest.mark.unit +def test_run_removed_from_campaign_preserves_input_dataset_ids() -> None: + """Orthogonal arm: post-hoc Campaign membership removal MUST + preserve the input Dataset ref set.""" + from cora.run.aggregates.run.events import ( + RunAddedToCampaign, + RunRemovedFromCampaign, + ) + + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + run_id = uuid4() + campaign_id = uuid4() + state = fold( + [ + RunStarted( + run_id=run_id, + name="Run", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a,), + ), + RunAddedToCampaign( + run_id=run_id, + campaign_id=campaign_id, + occurred_at=_NOW, + ), + RunRemovedFromCampaign( + run_id=run_id, + campaign_id=campaign_id, + reason="removed", + occurred_at=_NOW, + ), + ] + ) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a}) + + @pytest.mark.unit def test_decision_debrief_requested_is_audit_only_no_state_mutation() -> None: """The lease marker emitted by Agent BC subscribers is provenance-only diff --git a/apps/api/tests/unit/run/test_run_input_dataset_ids.py b/apps/api/tests/unit/run/test_run_input_dataset_ids.py new file mode 100644 index 00000000000..435a63e4d38 --- /dev/null +++ b/apps/api/tests/unit/run/test_run_input_dataset_ids.py @@ -0,0 +1,359 @@ +"""Unit tests for Run.input_dataset_ids (PROV `used` input Dataset refs). + +Mirrors the pinned_calibration_ids suite: the field flows from the +start_run decider through RunStarted, to_payload, from_stored, and the +evolver fold onto Run.input_dataset_ids. NO cross-BC existence check is +exercised here (id-only atomic refs, cross-BC eventual-consistency +stance); only set cardinality is validated. The start_run gate that +reads each input Dataset's Verified Distribution lands separately and +goes through the Data BC. +""" + +from datetime import UTC, datetime +from uuid import UUID, uuid4 + +import pytest + +from cora.equipment.aggregates.asset import ( + Asset, + AssetLifecycle, + AssetName, + AssetTier, +) +from cora.infrastructure.ports.clearance_lookup import ClearanceLookupResult +from cora.infrastructure.ports.event_store import StoredEvent +from cora.recipe.aggregates.plan import Plan, PlanName, PlanStatus +from cora.run.aggregates.run import ( + RUN_INPUT_DATASETS_MAX_ENTRIES, + InvalidInputDatasetsError, + fold, + validate_input_dataset_ids, +) +from cora.run.aggregates.run.events import RunStarted, from_stored, to_payload +from cora.run.features import start_run +from cora.run.features.start_run import RunStartContext, StartRun +from cora.subject.aggregates.subject import Subject, SubjectName, SubjectStatus + +_NOW = datetime(2026, 5, 11, 12, 0, 0, tzinfo=UTC) + + +def _active_clearance_stub() -> tuple[ClearanceLookupResult, ...]: + return ( + ClearanceLookupResult( + clearance_id=UUID(int=0), + status="Active", + template_id=UUID(int=1), + template_code="ESAF", + facility_code="aps", + ), + ) + + +def _plan(*, asset_ids: frozenset[UUID]) -> Plan: + return Plan( + id=uuid4(), + name=PlanName("Reconstruction"), + practice_id=uuid4(), + asset_ids=asset_ids, + status=PlanStatus.DEFINED, + ) + + +def _asset(*, asset_id: UUID, family_ids: frozenset[UUID]) -> Asset: + return Asset( + id=asset_id, + name=AssetName("ComputeNode"), + tier=AssetTier.DEVICE, + parent_id=uuid4(), + lifecycle=AssetLifecycle.ACTIVE, + family_ids=family_ids, + ) + + +def _subject() -> Subject: + return Subject( + id=uuid4(), + name=SubjectName("PorousCeramicSample-A"), + status=SubjectStatus.MOUNTED, + ) + + +def _context() -> tuple[RunStartContext, UUID, Subject]: + cap = uuid4() + asset_id = uuid4() + plan = _plan(asset_ids=frozenset({asset_id})) + asset = _asset(asset_id=asset_id, family_ids=frozenset({cap})) + subject = _subject() + context = RunStartContext( + plan=plan, + subject=subject, + assets={asset_id: asset}, + referencing_clearances=_active_clearance_stub(), + ) + return context, cap, subject + + +def _stored(event_type: str, payload: dict[str, object]) -> StoredEvent: + return StoredEvent( + position=1, + event_id=uuid4(), + stream_type="Run", + stream_id=uuid4(), # type: ignore[arg-type] + version=1, + event_type=event_type, + schema_version=1, + payload=payload, + correlation_id=uuid4(), + causation_id=None, + occurred_at=_NOW, + recorded_at=_NOW, + ) + + +# ---------- validate_input_dataset_ids ---------- + + +@pytest.mark.unit +def test_validate_input_dataset_ids_accepts_empty() -> None: + """Empty ref set is the default (Run with no input Datasets).""" + assert validate_input_dataset_ids(frozenset()) == frozenset() + + +@pytest.mark.unit +def test_validate_input_dataset_ids_accepts_within_cap() -> None: + """A reasonable-size ref set (under the cap) is accepted verbatim, + with no element-level existence check at this layer.""" + s = frozenset(uuid4() for _ in range(10)) + assert validate_input_dataset_ids(s) == s + + +@pytest.mark.unit +def test_validate_input_dataset_ids_accepts_exactly_at_cap() -> None: + """Boundary: exactly RUN_INPUT_DATASETS_MAX_ENTRIES is accepted + (off-by-one guard mirrors the pinned_calibration_ids boundary).""" + s = frozenset(uuid4() for _ in range(RUN_INPUT_DATASETS_MAX_ENTRIES)) + assert validate_input_dataset_ids(s) == s + + +@pytest.mark.unit +def test_validate_input_dataset_ids_rejects_over_cap() -> None: + """Cardinality cap rejects > RUN_INPUT_DATASETS_MAX_ENTRIES; raises + InvalidInputDatasetsError. Mirrors the pinned_calibration_ids cap + exactly (same precedent + same default cap of 64).""" + s = frozenset(uuid4() for _ in range(RUN_INPUT_DATASETS_MAX_ENTRIES + 1)) + with pytest.raises(InvalidInputDatasetsError): + validate_input_dataset_ids(s) + + +@pytest.mark.unit +def test_invalid_input_datasets_error_carries_count() -> None: + """The error class exposes `.count` for observability + debugging + (matches the pinned_calibration_ids error contract).""" + bad_count = RUN_INPUT_DATASETS_MAX_ENTRIES + 5 + err = InvalidInputDatasetsError(bad_count) + assert err.count == bad_count + assert str(bad_count) in str(err) + + +# ---------- event serialization ---------- + + +@pytest.mark.unit +def test_to_payload_serializes_run_started_with_input_dataset_ids_sorted() -> None: + """The wire form is a list sorted lexicographically for deterministic + byte ordering (the in-memory frozenset has no order).""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + ds_b = UUID("01900000-0000-7000-8000-0000000d5002") + ds_c = UUID("01900000-0000-7000-8000-0000000d5003") + event = RunStarted( + run_id=uuid4(), + name="Reconstruction consuming three input Datasets", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_c, ds_a, ds_b), + ) + payload = to_payload(event) + assert payload["input_dataset_ids"] == sorted([str(ds_a), str(ds_b), str(ds_c)]) + + +@pytest.mark.unit +def test_to_payload_always_renders_input_dataset_ids_key_when_empty() -> None: + """Run has no content-hash so the key is rendered unconditionally; + an empty input set serializes as `[]`, not an omitted key.""" + event = RunStarted( + run_id=uuid4(), + name="Run without input Datasets", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + ) + payload = to_payload(event) + assert payload["input_dataset_ids"] == [] + + +@pytest.mark.unit +def test_from_stored_rebuilds_run_started_without_input_dataset_ids_key_as_empty() -> None: + """Forward-compat: legacy RunStarted payloads have no + input_dataset_ids key. from_stored returns an empty tuple via + `payload.get(..., [])`.""" + stored = _stored( + "RunStarted", + { + "run_id": str(uuid4()), + "name": "Legacy run", + "plan_id": str(uuid4()), + "subject_id": None, + "occurred_at": _NOW.isoformat(), + # NOTE: no "input_dataset_ids" key, legacy shape. + }, + ) + event = from_stored(stored) + assert isinstance(event, RunStarted) + assert event.input_dataset_ids == () + + +@pytest.mark.unit +def test_run_started_input_dataset_ids_round_trip() -> None: + """RunStarted with input_dataset_ids round-trips through to_payload + + from_stored. The event class holds them as a tuple; to_payload sorts + before serialise so already-sorted input round-trips trivially.""" + ds_a = UUID("01900000-0000-7000-8000-0000000d5001") + ds_b = UUID("01900000-0000-7000-8000-0000000d5002") + original = RunStarted( + run_id=uuid4(), + name="Run with input Datasets", + plan_id=uuid4(), + subject_id=None, + occurred_at=_NOW, + input_dataset_ids=(ds_a, ds_b), + ) + stored = _stored("RunStarted", to_payload(original)) + assert from_stored(stored) == original + + +# ---------- end-to-end fold ---------- + + +@pytest.mark.unit +def test_input_dataset_ids_flow_decider_to_run_state_as_frozenset() -> None: + """End-to-end: decider -> RunStarted -> to_payload -> from_stored -> + fold -> Run.input_dataset_ids as a frozenset (in-memory equality).""" + context, cap, subject = _context() + ds_a = uuid4() + ds_b = uuid4() + new_id = uuid4() + decision = start_run.decide( + state=None, + command=StartRun( + name="Reconstruction", + plan_id=context.plan.id, + subject_id=subject.id, + input_dataset_ids=frozenset({ds_a, ds_b}), + ), + context=context, + needed_family_ids_snapshot=frozenset({cap}), + effective_parameters={}, + method_parameters_schema=None, + now=_NOW, + new_id=new_id, + ) + event = decision.run_events[0] + rebuilt = from_stored(_stored("RunStarted", to_payload(event))) + state = fold([rebuilt]) + assert state is not None + assert state.input_dataset_ids == frozenset({ds_a, ds_b}) + + +# ---------- decider threading + cardinality ---------- + + +@pytest.mark.unit +def test_decide_defaults_input_dataset_ids_to_empty_when_omitted() -> None: + """Ref set defaults to empty frozenset; emitted event payload is `()`.""" + context, cap, subject = _context() + decision = start_run.decide( + state=None, + command=StartRun(name="Run", plan_id=context.plan.id, subject_id=subject.id), + context=context, + needed_family_ids_snapshot=frozenset({cap}), + effective_parameters={}, + method_parameters_schema=None, + now=_NOW, + new_id=uuid4(), + ) + assert decision.run_events[0].input_dataset_ids == () + + +@pytest.mark.unit +def test_decide_threads_input_dataset_ids_sorted_through_to_event() -> None: + """The decider sorts the operator-supplied frozenset before emit so + the event payload has deterministic bytes.""" + context, cap, subject = _context() + ds_a = uuid4() + ds_b = uuid4() + ds_c = uuid4() + decision = start_run.decide( + state=None, + command=StartRun( + name="Reconstruction", + plan_id=context.plan.id, + subject_id=subject.id, + input_dataset_ids=frozenset({ds_c, ds_a, ds_b}), + ), + context=context, + needed_family_ids_snapshot=frozenset({cap}), + effective_parameters={}, + method_parameters_schema=None, + now=_NOW, + new_id=uuid4(), + ) + assert decision.run_events[0].input_dataset_ids == tuple(sorted([ds_a, ds_b, ds_c])) + + +@pytest.mark.unit +def test_decide_rejects_input_dataset_ids_over_cap() -> None: + """Cardinality cap on the input Dataset ref set. Symmetric to the + pinned_calibration_ids decider rejecting > 64 entries.""" + context, cap, subject = _context() + too_many = frozenset(uuid4() for _ in range(RUN_INPUT_DATASETS_MAX_ENTRIES + 1)) + with pytest.raises(InvalidInputDatasetsError): + start_run.decide( + state=None, + command=StartRun( + name="Too many input Datasets", + plan_id=context.plan.id, + subject_id=subject.id, + input_dataset_ids=too_many, + ), + context=context, + needed_family_ids_snapshot=frozenset({cap}), + effective_parameters={}, + method_parameters_schema=None, + now=_NOW, + new_id=uuid4(), + ) + + +@pytest.mark.unit +def test_decide_accepts_input_dataset_ids_exactly_at_cap() -> None: + """Boundary guard: exactly at the cap is accepted (off-by-one mirror + of the pinned_calibration_ids boundary test).""" + context, cap, subject = _context() + at_cap = frozenset(uuid4() for _ in range(RUN_INPUT_DATASETS_MAX_ENTRIES)) + decision = start_run.decide( + state=None, + command=StartRun( + name="Cap input Datasets", + plan_id=context.plan.id, + subject_id=subject.id, + input_dataset_ids=at_cap, + ), + context=context, + needed_family_ids_snapshot=frozenset({cap}), + effective_parameters={}, + method_parameters_schema=None, + now=_NOW, + new_id=uuid4(), + ) + assert len(decision.run_events[0].input_dataset_ids) == RUN_INPUT_DATASETS_MAX_ENTRIES