Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions adapter/encryption_admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,211 @@ func freshCutoverResponse(sc *encryption.Sidecar, proposedIdx uint64, fanoutResu
}
}

// raftEnvelopeWrapEnabled gates the EnableRaftEnvelope RPC until
// the Stage 6E-2 wrap-on-propose / unwrap-on-apply / §7.1
// proposal-quiescence-barrier triple ships. With only 6E-1
// (admin RPC + FSM-apply machinery) deployed, accepting a cutover
// proposal records RaftEnvelopeCutoverIndex=N in the sidecar, but
// no entry between N and the eventual 6E-2 upgrade is wrapped —
// they all remain cleartext. The future engine apply-hook
// (designed in §6.3) dispatches `entry.Index > cutover` through
// the unwrap path, so a 6E-2 upgrade against a sidecar where N
// was recorded under a 6E-1-only build would treat every
// pre-upgrade cleartext entry above N as an envelope and halt
// apply cluster-wide. Gate fails closed until 6E-2 atomically
// flips this to true alongside the wrap/unwrap/barrier wiring.
const raftEnvelopeWrapEnabled = false

// EnableRaftEnvelope is the Stage 6E Phase 2 cutover — flips Raft
// proposals from cleartext to §4.2-envelope. Structural mirror of
// EnableStorageEnvelope; the differences are:
//
// - Target Purpose is PurposeRaft.
// - Source DEK slot is sidecar.Active.Raft (not Active.Storage).
// - The "already active" sentinel is the single field
// sidecar.RaftEnvelopeCutoverIndex != 0 — there is no separate
// bool flag, so the raft variant has no equivalent of the
// §6.4 cutover_index_unknown defensive fallback (a zero index
// is exactly the not-active state, not a corrupted-active
// state, and the 6E-1a applier fail-closes on raftIdx == 0
// before ApplyRegistration).
//
// The semaphore, pre-check / fan-out / propose / post-check
// sequence, and error mapping match the storage variant verbatim;
// see EnableStorageEnvelope for the full design rationale.
//
// **Gated**: refuses with FailedPrecondition until 6E-2 ships the
// wrap-on-propose / unwrap-on-apply / §7.1 barrier (see
// raftEnvelopeWrapEnabled for the rationale). The pre-gate
// validation surface (leader, semaphore acquire, request shape)
// still fires so operators get fast feedback on wiring problems,
// but no Raft proposal is composed and no sidecar mutation occurs.
func (s *EncryptionAdminServer) EnableRaftEnvelope(ctx context.Context, req *pb.EnableRaftEnvelopeRequest) (*pb.EnableRaftEnvelopeResponse, error) {
if err := s.acquireCutoverSemaphore(ctx); err != nil {
return nil, err
}
defer s.releaseCutoverSemaphore()
preSidecar, earlyResp, err := s.raftCutoverPrecheck(ctx, req)
if err != nil {
return nil, err
}
if earlyResp != nil {
return earlyResp, nil
}
if !raftEnvelopeWrapEnabled {
// Without 6E-2 wrap-on-propose, recording the cutover
// index here would let cleartext entries land at indexes
// > N. A 6E-2 upgrade would then treat those cleartext
// entries as envelopes and halt apply. Refuse before the
// fan-out and propose so no sidecar state changes.
return nil, grpcStatusError(codes.FailedPrecondition,
"encryption: enable-raft-envelope is gated until Stage 6E-2 ships wrap-on-propose / unwrap-on-apply / §7.1 proposal-quiescence-barrier; accepting the cutover now would brick the cluster on the next upgrade")
}
fanoutResult, err := s.runCutoverFanout(ctx)
if err != nil {
return nil, err
}
proposedIdx, err := s.proposeRaftCutoverEntry(ctx, preSidecar, req)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Gate raft cutover until wrapping support ships

When this RPC is invoked before the 6E-2 wrap/unwrap/barrier path is active, it persists RaftEnvelopeCutoverIndex=N while all subsequent Raft entries are still proposed in cleartext. The planned apply hook keys off entry.Index > sidecar.RaftEnvelopeCutoverIndex, so after upgrading/restarting with 6E-2 any cleartext entries committed at indexes greater than N will be treated as envelopes and replay/apply can halt. The marker should not be proposed until wrapping is active, or the activation scheme must ensure no cleartext entries can exist above the recorded cutover index.

Useful? React with 👍 / 👎.

if err != nil {
return nil, err
}
return s.raftCutoverPostcheck(proposedIdx, fanoutResult)
}

// raftCutoverPrecheck runs the Stage 6E §3.2 steps 1-5: input
// validation, leader gate, bootstrap gate, idempotent-retry
// short-circuit. Returns:
//
// - (preSidecar, nil, nil) on the propose-path
// - (nil, earlyResp, nil) on the idempotent retry
// - (nil, nil, err) on any refusal
func (s *EncryptionAdminServer) raftCutoverPrecheck(ctx context.Context, req *pb.EnableRaftEnvelopeRequest) (*encryption.Sidecar, *pb.EnableRaftEnvelopeResponse, error) {
if err := s.requireLeader(ctx); err != nil {
return nil, nil, err
}
if s.proposer == nil {
return nil, nil, grpcStatusError(codes.FailedPrecondition, "encryption: proposer is not configured on this node")
}
if s.sidecarPath == "" {
return nil, nil, grpcStatusError(codes.FailedPrecondition, "encryption: sidecar path is not configured on this node")
}
if err := validateEnableRaftEnvelopeRequest(req); err != nil {
return nil, nil, err
}
preSidecar, err := encryption.ReadSidecar(s.sidecarPath)
if err != nil {
return nil, nil, statusFromSidecarErr(err)
}
if preSidecar.Active.Raft == 0 {
return nil, nil, grpcStatusError(codes.FailedPrecondition,
"encryption: cluster not bootstrapped (Active.Raft == 0) — call BootstrapEncryption first")
}
if preSidecar.RaftEnvelopeCutoverIndex != 0 {
// Idempotent retry — return OK with was_already_active=true
// and the original cutover index. Skip the fan-out: the
// original cutover already passed the gate.
return nil, idempotentRaftCutoverResponse(preSidecar), nil
}
return preSidecar, nil, nil
}

// proposeRaftCutoverEntry composes the §2.1 RotationPayload for
// the raft variant and drives it through Raft. Purpose=PurposeRaft,
// DEKID = sidecar.Active.Raft, Wrapped=empty (length-based, not
// nil, matching the 6E-1a applier's length-based reject).
func (s *EncryptionAdminServer) proposeRaftCutoverEntry(ctx context.Context, preSidecar *encryption.Sidecar, req *pb.EnableRaftEnvelopeRequest) (uint64, error) {
payload := fsmwire.RotationPayload{
SubTag: fsmwire.RotateSubEnableRaftEnvelope,
DEKID: preSidecar.Active.Raft,
Purpose: fsmwire.PurposeRaft,
Wrapped: []byte{},
ProposerRegistration: fsmwire.RegistrationPayload{
DEKID: preSidecar.Active.Raft,
FullNodeID: req.GetProposerNodeId(),
LocalEpoch: uint32ToLocalEpoch(req.GetProposerLocalEpoch()),
},
}
return s.proposeEncryptionEntry(ctx, fsmwire.OpRotation, fsmwire.EncodeRotation(payload))
}

// raftCutoverPostcheck re-reads the sidecar after the Raft propose
// returns and discriminates the §2.1 outcomes:
//
// - Fresh success: RaftEnvelopeCutoverIndex == proposedIdx → §3.2
// happy path.
// - Stale-DEKID race: RaftEnvelopeCutoverIndex still 0 because
// a RotateDEK raced and the applier consumed the entry as a
// benign no-op → FailedPrecondition with retry hint.
// - Concurrent overlap: RaftEnvelopeCutoverIndex != 0 but !=
// proposedIdx → another cutover landed first (operator-
// impossible under the semaphore, but the applier records
// the FIRST cutover's index; surface that index with
// was_already_active=false because THIS call's propose
// committed an entry that the applier treated as the
// idempotent path).
func (s *EncryptionAdminServer) raftCutoverPostcheck(proposedIdx uint64, fanoutResult admin.CapabilityFanoutResult) (*pb.EnableRaftEnvelopeResponse, error) {
postSidecar, err := encryption.ReadSidecar(s.sidecarPath)
if err != nil {
return nil, statusFromSidecarErr(err)
}
if postSidecar.RaftEnvelopeCutoverIndex == 0 {
return nil, grpcStatusError(codes.FailedPrecondition,
"encryption: cutover proposal raced a RotateDEK (sidecar.Active.Raft moved); retry against the new active DEK")
}
return freshRaftCutoverResponse(postSidecar, proposedIdx, fanoutResult), nil
}

// validateEnableRaftEnvelopeRequest enforces the §3.2 step 1
// gRPC-boundary checks. Pulled out so the EnableRaftEnvelope
// orchestration body stays under the cyclomatic-complexity budget
// and so tests can exercise the validation slice in isolation.
func validateEnableRaftEnvelopeRequest(req *pb.EnableRaftEnvelopeRequest) error {
if req.GetProposerNodeId() == 0 {
return grpcStatusError(codes.InvalidArgument,
"encryption: proposer_node_id must be non-zero (0 is reserved as the §6.1 not-capable sentinel)")
}
if req.GetProposerLocalEpoch() > math.MaxUint16 {
return grpcStatusErrorf(codes.InvalidArgument,
"encryption: proposer_local_epoch=%d exceeds the §4.1 16-bit bound (max 0xFFFF)",
req.GetProposerLocalEpoch())
}
return nil
}

// idempotentRaftCutoverResponse is the §3.2 step 5 retry-success
// shape for the raft variant: OK, was_already_active=true,
// applied_index = sidecar.RaftEnvelopeCutoverIndex (the original
// cutover's apply index). The storage variant's
// cutover_index_unknown defensive branch is intentionally absent
// — the raft variant uses the cutover index itself as the active
// sentinel, so a non-zero index here cannot coexist with the
// "active but unknown index" state the storage hedge was for.
func idempotentRaftCutoverResponse(sc *encryption.Sidecar) *pb.EnableRaftEnvelopeResponse {
return &pb.EnableRaftEnvelopeResponse{
WasAlreadyActive: true,
CapabilitySummary: nil,
AppliedIndex: sc.RaftEnvelopeCutoverIndex,
}
}

// freshRaftCutoverResponse is the §3.2 fresh-success shape for the
// raft variant. applied_index is sourced from the post-apply
// sidecar's RaftEnvelopeCutoverIndex, which raftCutoverPostcheck
// has already validated as non-zero (the stale-DEKID branch
// refuses earlier, so reaching here implies the apply set the
// cutover index). The storage variant's `appliedIndex == 0`
// defensive branch has no analogue here because the raft variant
// uses the cutover index itself as the active sentinel: a zero
// at this point would be an upstream invariant violation, not a
// hand-edit hazard.
func freshRaftCutoverResponse(sc *encryption.Sidecar, _ uint64, fanoutResult admin.CapabilityFanoutResult) *pb.EnableRaftEnvelopeResponse {
return &pb.EnableRaftEnvelopeResponse{
AppliedIndex: sc.RaftEnvelopeCutoverIndex,
CapabilitySummary: projectCapabilityVerdicts(fanoutResult.Verdicts),
WasAlreadyActive: false,
}
}

// projectCapabilityVerdicts marshals the internal CapabilityVerdict
// shape into the wire-format proto.CapabilityVerdict. Reachable /
// Err fields are intentionally NOT projected: the cutover RPC only
Expand Down
174 changes: 174 additions & 0 deletions adapter/encryption_admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1891,3 +1891,177 @@ func assertProtoVerdict(t *testing.T, v *pb.CapabilityVerdict, wantNodeID uint64
v, wantNodeID, wantBuildSHA)
}
}

// validEnableRaftEnvelopeRequest is the canonical valid request
// for the raft-variant tests. Same proposer identity shape as the
// storage variant — the two requests' Go types are independent
// but structurally identical (proposer_node_id, proposer_local_epoch).
func validEnableRaftEnvelopeRequest() *pb.EnableRaftEnvelopeRequest {
return &pb.EnableRaftEnvelopeRequest{
ProposerNodeId: 11,
ProposerLocalEpoch: 7,
}
}

// applyRaftCutover is the §6.4-equivalent fresh-success apply
// effect for the raft variant: stamp RaftEnvelopeCutoverIndex
// with the apply's Raft index. Used by the EnableRaftEnvelope
// happy-path test to drive the post-Propose sidecar re-read into
// the fresh-success branch.
func applyRaftCutover(sc *encryption.Sidecar, raftIdx uint64) {
sc.RaftEnvelopeCutoverIndex = raftIdx
if raftIdx > sc.RaftAppliedIndex {
sc.RaftAppliedIndex = raftIdx
}
}

// TestEncryptionAdmin_EnableRaftEnvelope_RejectsZeroProposerNodeID
// pins the §6.1 sentinel rejection at the gRPC boundary.
func TestEncryptionAdmin_EnableRaftEnvelope_RejectsZeroProposerNodeID(t *testing.T) {
t.Parallel()
srv := NewEncryptionAdminServer(
WithEncryptionAdminProposer(&recordingProposer{}),
WithEncryptionAdminLeaderView(stubLeaderView{state: raftengine.StateLeader}),
WithEncryptionAdminSidecarPath(cutoverReadySidecarFixture(t)),
)
req := validEnableRaftEnvelopeRequest()
req.ProposerNodeId = 0
_, err := srv.EnableRaftEnvelope(context.Background(), req)
if status.Code(err) != codes.InvalidArgument {
t.Errorf("EnableRaftEnvelope status=%v, want InvalidArgument", status.Code(err))
}
}

// TestEncryptionAdmin_EnableRaftEnvelope_RejectsOversizedLocalEpoch
// pins the §4.1 16-bit bound at the gRPC boundary.
func TestEncryptionAdmin_EnableRaftEnvelope_RejectsOversizedLocalEpoch(t *testing.T) {
t.Parallel()
srv := NewEncryptionAdminServer(
WithEncryptionAdminProposer(&recordingProposer{}),
WithEncryptionAdminLeaderView(stubLeaderView{state: raftengine.StateLeader}),
WithEncryptionAdminSidecarPath(cutoverReadySidecarFixture(t)),
)
req := validEnableRaftEnvelopeRequest()
req.ProposerLocalEpoch = math.MaxUint16 + 1
_, err := srv.EnableRaftEnvelope(context.Background(), req)
if status.Code(err) != codes.InvalidArgument {
t.Errorf("EnableRaftEnvelope status=%v, want InvalidArgument", status.Code(err))
}
}

// TestEncryptionAdmin_EnableRaftEnvelope_RejectsNotBootstrapped pins
// the raft-variant bootstrap gate: Active.Raft == 0 means
// BootstrapEncryption has not committed yet, so the cutover must
// refuse.
func TestEncryptionAdmin_EnableRaftEnvelope_RejectsNotBootstrapped(t *testing.T) {
t.Parallel()
path := writeSidecarFixture(t, &encryption.Sidecar{
Active: encryption.ActiveKeys{Storage: 0, Raft: 0},
Keys: map[string]encryption.SidecarKey{},
})
srv := NewEncryptionAdminServer(
WithEncryptionAdminProposer(&recordingProposer{}),
WithEncryptionAdminLeaderView(stubLeaderView{state: raftengine.StateLeader}),
WithEncryptionAdminSidecarPath(path),
)
_, err := srv.EnableRaftEnvelope(context.Background(), validEnableRaftEnvelopeRequest())
if status.Code(err) != codes.FailedPrecondition {
t.Errorf("EnableRaftEnvelope status=%v, want FailedPrecondition", status.Code(err))
}
if err == nil || !strings.Contains(err.Error(), "BootstrapEncryption") {
t.Errorf("error %q does not hint at BootstrapEncryption", err)
}
}

// TestEncryptionAdmin_EnableRaftEnvelope_IdempotentRetry pins the
// retry path: a duplicate call against a sidecar with
// RaftEnvelopeCutoverIndex != 0 returns OK with
// was_already_active=true and applied_index = the original
// cutover index. No fan-out, no propose. The raft variant has no
// CutoverIndexUnknown field, so the response is intentionally
// narrower than the storage variant's idempotent shape.
func TestEncryptionAdmin_EnableRaftEnvelope_IdempotentRetry(t *testing.T) {
t.Parallel()
path := writeSidecarFixture(t, &encryption.Sidecar{
Active: encryption.ActiveKeys{Storage: 5, Raft: 6},
Keys: map[string]encryption.SidecarKey{"5": {Purpose: encryption.SidecarPurposeStorage, Wrapped: []byte("ws"), Created: "x", LocalEpoch: 0}, "6": {Purpose: encryption.SidecarPurposeRaft, Wrapped: []byte("wr"), Created: "x", LocalEpoch: 0}},
RaftEnvelopeCutoverIndex: 777,
RaftAppliedIndex: 900,
})
proposer := &recordingProposer{}
srv := NewEncryptionAdminServer(
WithEncryptionAdminProposer(proposer),
WithEncryptionAdminLeaderView(stubLeaderView{state: raftengine.StateLeader}),
WithEncryptionAdminSidecarPath(path),
WithEncryptionAdminCapabilityFanout(failOnCallCapabilityFanout(t)),
)
got, err := srv.EnableRaftEnvelope(context.Background(), validEnableRaftEnvelopeRequest())
if err != nil {
t.Fatalf("EnableRaftEnvelope: %v", err)
}
if !got.WasAlreadyActive {
t.Error("WasAlreadyActive=false, want true (idempotent retry)")
}
if got.AppliedIndex != 777 {
t.Errorf("AppliedIndex=%d, want 777 (original RaftEnvelopeCutoverIndex)", got.AppliedIndex)
}
if len(got.CapabilitySummary) != 0 {
t.Errorf("CapabilitySummary len=%d, want 0 (empty on idempotent retries)", len(got.CapabilitySummary))
}
if len(proposer.calls) != 0 {
t.Errorf("proposer.calls len=%d, want 0 (no propose on idempotent retry)", len(proposer.calls))
}
}

// TestEncryptionAdmin_EnableRaftEnvelope_GatedUntil6E2 pins the
// fail-closed gate: while raftEnvelopeWrapEnabled is false (i.e.
// before Stage 6E-2 ships wrap-on-propose / unwrap-on-apply /
// §7.1 barrier), the RPC MUST refuse fresh cutover proposals
// with FailedPrecondition rather than recording
// RaftEnvelopeCutoverIndex=N. Recording N now would let cleartext
// entries land at indexes > N and the 6E-2 engine apply-hook
// would treat them as envelopes on upgrade, halting apply
// cluster-wide.
//
// The test wires an `applyingProposer` exactly as a future
// happy-path test would, then verifies the gate refuses BEFORE
// any proposal is composed (proposer.calls is empty) and BEFORE
// any sidecar mutation lands (RaftEnvelopeCutoverIndex stays 0).
// When 6E-2 lands and flips raftEnvelopeWrapEnabled to true, this
// test becomes the regression pin for the gate-flip and a
// HappyPath sibling is added.
func TestEncryptionAdmin_EnableRaftEnvelope_GatedUntil6E2(t *testing.T) {
t.Parallel()
path := cutoverReadySidecarFixture(t)
proposer := &applyingProposer{
recordingProposer: recordingProposer{commitIndex: 4242},
sidecarPath: path,
applyFn: applyRaftCutover,
}
srv := NewEncryptionAdminServer(
WithEncryptionAdminProposer(proposer),
WithEncryptionAdminLeaderView(stubLeaderView{state: raftengine.StateLeader}),
WithEncryptionAdminSidecarPath(path),
WithEncryptionAdminCapabilityFanout(fixedCapabilityFanout(allOKFanoutResult(), nil)),
)
_, err := srv.EnableRaftEnvelope(context.Background(), validEnableRaftEnvelopeRequest())
if status.Code(err) != codes.FailedPrecondition {
t.Errorf("EnableRaftEnvelope status=%v, want FailedPrecondition (gated until 6E-2)", status.Code(err))
}
if err == nil || !strings.Contains(err.Error(), "6E-2") {
t.Errorf("error %q does not hint at the 6E-2 gate", err)
}
if len(proposer.calls) != 0 {
t.Errorf("proposer.calls len=%d, want 0 (gate must refuse before propose)", len(proposer.calls))
}
// Sidecar must remain untouched: RaftEnvelopeCutoverIndex
// still 0 means the cluster has not entered Phase 2 and a
// future 6E-2 upgrade is still safe.
sc, readErr := encryption.ReadSidecar(path)
if readErr != nil {
t.Fatalf("ReadSidecar: %v", readErr)
}
if sc.RaftEnvelopeCutoverIndex != 0 {
t.Errorf("RaftEnvelopeCutoverIndex=%d, want 0 (gate must refuse before sidecar mutation)", sc.RaftEnvelopeCutoverIndex)
}
}
Loading
Loading