From bef54ed64bf623a698fade6bc22ff46d7c5070fb Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 28 May 2026 20:59:39 +0200 Subject: [PATCH 01/21] direct: replace WaitAfterCreate/WaitAfterUpdate with Engine callback for intermediate state saves Resources with multi-step deployments (apps, clusters, model serving endpoints, etc.) previously split their logic between DoCreate/DoUpdate and WaitAfterCreate/WaitAfterUpdate at an arbitrary point, making it hard to persist state incrementally. Replace the WaitAfterXxx methods with an *Engine parameter on DoCreate and DoUpdate. Engine.SetID + Engine.SaveState can be called immediately after the initial API call succeeds, before any long-running wait, so the resource is tracked in state even if deployment is interrupted mid-wait (preventing orphaned resources). Simple resources pass _ *Engine and are unaffected. Complex resources (apps, clusters, database instances, model serving endpoints, vector search endpoints and indexes) now inline their wait logic and call engine.SetID/SaveState at the appropriate point. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 51 +++------- bundle/direct/dresources/README.md | 4 +- bundle/direct/dresources/adapter.go | 94 +++---------------- bundle/direct/dresources/alert.go | 4 +- bundle/direct/dresources/all_test.go | 19 +--- bundle/direct/dresources/app.go | 33 ++++--- bundle/direct/dresources/app_test.go | 36 +++++-- bundle/direct/dresources/catalog.go | 4 +- bundle/direct/dresources/cluster.go | 76 ++++++++------- bundle/direct/dresources/dashboard.go | 4 +- bundle/direct/dresources/database_catalog.go | 4 +- bundle/direct/dresources/database_instance.go | 36 +++---- bundle/direct/dresources/engine.go | 47 ++++++++++ bundle/direct/dresources/experiment.go | 4 +- bundle/direct/dresources/external_location.go | 4 +- bundle/direct/dresources/grants.go | 6 +- bundle/direct/dresources/job.go | 4 +- bundle/direct/dresources/model.go | 4 +- .../dresources/model_serving_endpoint.go | 25 ++--- bundle/direct/dresources/permissions.go | 6 +- bundle/direct/dresources/pipeline.go | 4 +- bundle/direct/dresources/postgres_branch.go | 4 +- bundle/direct/dresources/postgres_catalog.go | 2 +- bundle/direct/dresources/postgres_endpoint.go | 4 +- bundle/direct/dresources/postgres_project.go | 4 +- .../dresources/postgres_synced_table.go | 2 +- bundle/direct/dresources/quality_monitor.go | 4 +- bundle/direct/dresources/registered_model.go | 4 +- bundle/direct/dresources/schema.go | 4 +- bundle/direct/dresources/schema_test.go | 6 +- bundle/direct/dresources/secret_scope.go | 2 +- bundle/direct/dresources/secret_scope_acls.go | 4 +- bundle/direct/dresources/sql_warehouse.go | 4 +- .../dresources/synced_database_table.go | 4 +- .../dresources/vector_search_endpoint.go | 20 ++-- .../direct/dresources/vector_search_index.go | 44 ++++----- bundle/direct/dresources/volume.go | 4 +- 37 files changed, 274 insertions(+), 311 deletions(-) create mode 100644 bundle/direct/dresources/engine.go diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a4a61f727f2..b9db11fa6fa 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,6 +51,10 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { + engine := dresources.NewEngine(d.Adapter.StateType(), func(id string, x any) error { + return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + }) + var newID string var remoteState any _, err := retryWith(ctx, func(err error) bool { @@ -59,7 +63,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return ok && isTransient(ctx, err) }, func() (struct{}, error) { var e error - newID, remoteState, e = d.Adapter.DoCreate(ctx, newState) + newID, remoteState, e = d.Adapter.DoCreate(ctx, engine, newState) return struct{}{}, e }) err = dresources.UnwrapRetrySafe(err) @@ -80,18 +84,6 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("saving state after creating id=%s: %w", newID, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterCreate(ctx, newID, newState) - }) - if err != nil { - return fmt.Errorf("waiting after creating id=%s: %w", newID, err) - } - - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } @@ -134,8 +126,13 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } + engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { + return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + }) + engine.SetID(id) + remoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.DoUpdate(ctx, id, newState, planEntry) + return d.Adapter.DoUpdate(ctx, engine, id, newState, planEntry) }) if err != nil { return fmt.Errorf("updating id=%s: %w", id, err) @@ -151,19 +148,6 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("saving state id=%s: %w", id, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterUpdate(ctx, id, newState) - }) - if err != nil { - return fmt.Errorf("waiting after updating id=%s: %w", id, err) - } - - // Update remote state with the result from wait operation - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } @@ -195,19 +179,6 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment return fmt.Errorf("saving state id=%s: %w", oldID, err) } - waitRemoteState, err := retryOnTransient(ctx, func() (any, error) { - return d.Adapter.WaitAfterUpdate(ctx, newID, newState) - }) - if err != nil { - return fmt.Errorf("waiting after updating id=%s: %w", newID, err) - } - - // Update remote state with the result from wait operation - err = d.SetRemoteState(waitRemoteState) - if err != nil { - return err - } - return nil } diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..b9c6e4754e2 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -34,9 +34,9 @@ Do **not** derive update mask field names from `entry.Changes`. The paths in `en If a resource has fields that must not be sent in updates (deploy-only, lifecycle-only, etc.), document them explicitly with a `var` block and a comment explaining each exclusion. -## Async APIs: WaitAfterCreate / WaitAfterUpdate +## Async APIs -For resources whose create or update is asynchronous (the resource is not immediately ready after the call returns), implement `WaitAfterCreate` and/or `WaitAfterUpdate` instead of polling inline inside DoCreate/DoUpdate. These are the correct extension points in the framework, and polling inline bypasses state persistence timing. +For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SetID(id)` then `engine.SaveState(config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. ## Slice ordering: KeyedSlices diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 6891632b626..a29a094cc78 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,13 +53,14 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) - DoCreate(ctx context.Context, newState any) (id string, remoteState any, e error) + // Call engine.SetID then engine.SaveState to persist intermediate state before long-running waits. + // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) + DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) // [Optional] DoUpdate updates the resource. ID must not change as a result of this operation. Returns optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) - DoUpdate(ctx context.Context, id string, newState any, entry *PlanEntry) (remoteState any, e error) + // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) + DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (remoteState any, e error) // [Optional] DoUpdateWithID performs an update that may result in resource having a new ID. Returns new id and optionally remote state. DoUpdateWithID(ctx context.Context, id string, newState any) (newID string, remoteState any, e error) @@ -67,13 +68,6 @@ type IResource interface { // [Optional] DoResize resizes the resource. Only supported by clusters DoResize(ctx context.Context, id string, newState any) error - // [Optional] WaitAfterCreate waits for the resource to become ready after creation. Returns optionally updated remote state. - // TODO: wait status should be persisted in the state. - WaitAfterCreate(ctx context.Context, id string, newState any) (remoteState any, e error) - - // [Optional] WaitAfterUpdate waits for the resource to become ready after update. Returns optionally updated remote state. - WaitAfterUpdate(ctx context.Context, id string, newState any) (remoteState any, e error) - // [Optional] WaitAfterDelete waits for the resource to be fully removed after DoDelete returns. // Useful for backends with asynchronous deletion: a follow-up create on the same name (recreate path) // would otherwise race with the in-progress teardown. State is dropped before this is called, so a @@ -98,8 +92,6 @@ type Adapter struct { // Optional: doUpdate *calladapt.BoundCaller doUpdateWithID *calladapt.BoundCaller - waitAfterCreate *calladapt.BoundCaller - waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller doResize *calladapt.BoundCaller @@ -131,8 +123,6 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC doUpdate: nil, doUpdateWithID: nil, doResize: nil, - waitAfterCreate: nil, - waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, resourceConfig: GetResourceConfig(resourceType), @@ -206,16 +196,6 @@ func (a *Adapter) initMethods(resource any) error { return err } - a.waitAfterCreate, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterCreate") - if err != nil { - return err - } - - a.waitAfterUpdate, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterUpdate") - if err != nil { - return err - } - a.waitAfterDelete, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "WaitAfterDelete") if err != nil { return err @@ -278,7 +258,7 @@ func (a *Adapter) validate() error { validations := []any{ "PrepareState return", a.prepareState.OutTypes[0], stateType, - "DoCreate newState", a.doCreate.InTypes[1], stateType, + "DoCreate newState", a.doCreate.InTypes[2], stateType, "DoDelete state", a.doDelete.InTypes[2], stateType, } @@ -301,7 +281,7 @@ func (a *Adapter) validate() error { // Validate DoUpdate: must return (remoteType, error) if implemented if a.doUpdate != nil { - validations = append(validations, "DoUpdate newState", a.doUpdate.InTypes[2], stateType) + validations = append(validations, "DoUpdate newState", a.doUpdate.InTypes[3], stateType) if len(a.doUpdate.OutTypes) != 2 { return fmt.Errorf("DoUpdate must return (remoteType, error), got %d return values", len(a.doUpdate.OutTypes)) } @@ -321,24 +301,6 @@ func (a *Adapter) validate() error { validations = append(validations, "DoUpdateWithID remoteState return", a.doUpdateWithID.OutTypes[1], remoteType) } - if a.waitAfterCreate != nil { - validations = append(validations, "WaitAfterCreate newState", a.waitAfterCreate.InTypes[2], stateType) - // WaitAfterCreate must return (remoteType, error) - if len(a.waitAfterCreate.OutTypes) != 2 { - return fmt.Errorf("WaitAfterCreate must return (remoteType, error), got %d return values", len(a.waitAfterCreate.OutTypes)) - } - validations = append(validations, "WaitAfterCreate remoteState return", a.waitAfterCreate.OutTypes[0], remoteType) - } - - if a.waitAfterUpdate != nil { - validations = append(validations, "WaitAfterUpdate newState", a.waitAfterUpdate.InTypes[2], stateType) - // WaitAfterUpdate must return (remoteType, error) - if len(a.waitAfterUpdate.OutTypes) != 2 { - return fmt.Errorf("WaitAfterUpdate must return (remoteType, error), got %d return values", len(a.waitAfterUpdate.OutTypes)) - } - validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) - } - err = validateTypes(validations...) if err != nil { return err @@ -433,8 +395,8 @@ func normalizeNilPointer(v any) any { return v } -func (a *Adapter) DoCreate(ctx context.Context, newState any) (string, any, error) { - outs, err := a.doCreate.Call(ctx, newState) +func (a *Adapter) DoCreate(ctx context.Context, engine *Engine, newState any) (string, any, error) { + outs, err := a.doCreate.Call(ctx, engine, newState) if err != nil { return "", nil, err } @@ -451,12 +413,12 @@ func (a *Adapter) HasDoUpdate() bool { // DoUpdate updates the resource with the plan entry computed during plan. // Returns remote state if available, otherwise nil. -func (a *Adapter) DoUpdate(ctx context.Context, id string, newState any, entry *PlanEntry) (any, error) { +func (a *Adapter) DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (any, error) { if a.doUpdate == nil { return nil, errors.New("internal error: DoUpdate not found") } - outs, err := a.doUpdate.Call(ctx, id, newState, entry) + outs, err := a.doUpdate.Call(ctx, engine, id, newState, entry) if err != nil { return nil, err } @@ -495,40 +457,6 @@ func (a *Adapter) DoResize(ctx context.Context, id string, newState any) error { return err } -// WaitAfterCreate waits for the resource to become ready after creation. -// If the resource doesn't implement this method, this is a no-op. -// Returns the updated remoteState if available, otherwise returns nil -func (a *Adapter) WaitAfterCreate(ctx context.Context, id string, newState any) (any, error) { - if a.waitAfterCreate == nil { - return nil, nil // no-op if not implemented - } - - outs, err := a.waitAfterCreate.Call(ctx, id, newState) - if err != nil { - return nil, err - } - - remoteState := normalizeNilPointer(outs[0]) - return remoteState, nil -} - -// WaitAfterUpdate waits for the resource to become ready after update. -// If the resource doesn't implement this method, this is a no-op. -// Returns the updated remoteState if available, otherwise returns nil. -func (a *Adapter) WaitAfterUpdate(ctx context.Context, id string, newState any) (any, error) { - if a.waitAfterUpdate == nil { - return nil, nil // no-op if not implemented - } - - outs, err := a.waitAfterUpdate.Call(ctx, id, newState) - if err != nil { - return nil, err - } - - remoteState := normalizeNilPointer(outs[0]) - return remoteState, nil -} - // WaitAfterDelete waits for the resource to be fully removed after DoDelete. // If the resource doesn't implement this method, this is a no-op. func (a *Adapter) WaitAfterDelete(ctx context.Context, id string) error { diff --git a/bundle/direct/dresources/alert.go b/bundle/direct/dresources/alert.go index a18641e810a..af71e378d41 100644 --- a/bundle/direct/dresources/alert.go +++ b/bundle/direct/dresources/alert.go @@ -37,7 +37,7 @@ func (r *ResourceAlert) DoRead(ctx context.Context, id string) (*sql.AlertV2, er } // DoCreate creates the alert and returns its id. -func (r *ResourceAlert) DoCreate(ctx context.Context, config *sql.AlertV2) (string, *sql.AlertV2, error) { +func (r *ResourceAlert) DoCreate(ctx context.Context, _ *Engine, config *sql.AlertV2) (string, *sql.AlertV2, error) { request := sql.CreateAlertV2Request{ Alert: *config, } @@ -49,7 +49,7 @@ func (r *ResourceAlert) DoCreate(ctx context.Context, config *sql.AlertV2) (stri } // DoUpdate updates the alert in place. -func (r *ResourceAlert) DoUpdate(ctx context.Context, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { +func (r *ResourceAlert) DoUpdate(ctx context.Context, _ *Engine, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { request := sql.UpdateAlertV2Request{ Id: id, Alert: *config, diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index f18a84d0efc..d1fe61d80c7 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -827,7 +827,8 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W require.Error(t, err) // TODO: if errors.Is(err, databricks.ErrResourceDoesNotExist) {... } - createdID, remoteStateFromCreate, err := adapter.DoCreate(ctx, newState) + nopEngine := NewNopEngine(adapter.StateType()) + createdID, remoteStateFromCreate, err := adapter.DoCreate(ctx, nopEngine, newState) require.NoError(t, err, "DoCreate failed state=%v", newState) require.NotEmpty(t, createdID, "ID returned from DoCreate was empty") @@ -849,14 +850,8 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W "unexpected differences between remappedState and remappedRemoteStateFromCreate") } - remoteStateFromWaitCreate, err := adapter.WaitAfterCreate(ctx, createdID, newState) - require.NoError(t, err) - if remoteStateFromWaitCreate != nil { - require.Equal(t, remote, remoteStateFromWaitCreate) - } - if adapter.HasDoUpdate() { - remoteStateFromUpdate, err := adapter.DoUpdate(ctx, createdID, newState, &deployplan.PlanEntry{}) + remoteStateFromUpdate, err := adapter.DoUpdate(ctx, nopEngine, createdID, newState, &deployplan.PlanEntry{}) require.NoError(t, err, "DoUpdate failed") if remoteStateFromUpdate != nil { remappedStateFromUpdate, err := adapter.RemapState(remoteStateFromUpdate) @@ -865,14 +860,6 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W "unexpected differences between remappedState and remappedStateFromUpdate") } - remoteStateFromWaitUpdate, err := adapter.WaitAfterUpdate(ctx, createdID, newState) - require.NoError(t, err) - if remoteStateFromWaitUpdate != nil { - remappedStateFromWaitUpdate, err := adapter.RemapState(remoteStateFromWaitUpdate) - require.NoError(t, err) - ignoreFilter.requireEqual(t, remappedState, remappedStateFromWaitUpdate, - "unexpected differences between remappedState and remappedStateFromWaitUpdate") - } } require.NoError(t, structwalk.Walk(newState, func(path *structpath.PathNode, val any, field *reflect.StructField) { diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index ed6752d834d..e36036d28b0 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -116,7 +116,7 @@ func (r *ResourceApp) DoRead(ctx context.Context, id string) (*AppRemote, error) return remote, nil } -func (r *ResourceApp) DoCreate(ctx context.Context, config *AppState) (string, *AppRemote, error) { +func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppState) (string, *AppRemote, error) { // Start app compute only when lifecycle.started=true is explicit. // For nil (omitted) or false, use no_compute=true (do not start compute). noCompute := config.Lifecycle == nil || config.Lifecycle.Started == nil || !*config.Lifecycle.Started @@ -154,7 +154,22 @@ func (r *ResourceApp) DoCreate(ctx context.Context, config *AppState) (string, * return "", nil, err } - return app.Name, nil, nil + // Save state as soon as the app exists so it is not orphaned if the wait or + // lifecycle management is interrupted. + engine.SetID(app.Name) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + remote, err := r.waitForApp(ctx, r.client, config.Name) + if err != nil { + return "", nil, err + } + alreadyStarted := remote.Lifecycle != nil && remote.Lifecycle.Started != nil && *remote.Lifecycle.Started + if err := r.manageLifecycle(ctx, config.Name, config, alreadyStarted); err != nil { + return "", nil, err + } + return app.Name, remote, nil } var UpdateMaskFields = []string{ @@ -172,7 +187,7 @@ var UpdateMaskFields = []string{ var updateMask = strings.Join(UpdateMaskFields, ",") -func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { +func (r *ResourceApp) DoUpdate(ctx context.Context, _ *Engine, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { // Deploy-only fields (source_code_path, config, // git_source, lifecycle) are not part of apps.App and thus excluded from the request body. if hasAppChanges(entry) { @@ -302,18 +317,6 @@ func (r *ResourceApp) DoDelete(ctx context.Context, id string, _ *AppState) erro return err } -func (r *ResourceApp) WaitAfterCreate(ctx context.Context, id string, config *AppState) (*AppRemote, error) { - remote, err := r.waitForApp(ctx, r.client, config.Name) - if err != nil { - return nil, err - } - alreadyStarted := remote.Lifecycle != nil && remote.Lifecycle.Started != nil && *remote.Lifecycle.Started - if err := r.manageLifecycle(ctx, config.Name, config, alreadyStarted); err != nil { - return nil, err - } - return remote, nil -} - // waitForApp waits for the app to reach the target state. The target state is either ACTIVE or STOPPED. // Apps with no_compute set to true will reach the STOPPED state, otherwise they will reach the ACTIVE state. // We can't use the default waiter from SDK because it only waits on ACTIVE state but we need also STOPPED state. diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go index c7cbfacd1af..7d7ec940567 100644 --- a/bundle/direct/dresources/app_test.go +++ b/bundle/direct/dresources/app_test.go @@ -42,10 +42,18 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { getCallCount++ + if getCallCount == 1 { + return apps.App{ + Name: req.Vars["name"], + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateDeleting, + }, + } + } return apps.App{ Name: req.Vars["name"], ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateDeleting, + State: apps.ComputeStateActive, }, } }) @@ -60,12 +68,12 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { r := (&ResourceApp{}).New(client) ctx := t.Context() - name, _, err := r.DoCreate(ctx, &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)") - assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") + assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } // TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries @@ -97,11 +105,19 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { getCallCount++ - return testserver.Response{ - StatusCode: 404, - Body: map[string]string{ - "error_code": "RESOURCE_DOES_NOT_EXIST", - "message": "App not found.", + if getCallCount == 1 { + return testserver.Response{ + StatusCode: 404, + Body: map[string]string{ + "error_code": "RESOURCE_DOES_NOT_EXIST", + "message": "App not found.", + }, + } + } + return apps.App{ + Name: req.Vars["name"], + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, }, } }) @@ -116,12 +132,12 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { r := (&ResourceApp{}).New(client) ctx := t.Context() - name, _, err := r.DoCreate(ctx, &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) assert.Equal(t, 2, createCallCount, "expected Create to be called twice") - assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state") + assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } func TestAppDoUpdate_UpdateMaskHasAllFields(t *testing.T) { diff --git a/bundle/direct/dresources/catalog.go b/bundle/direct/dresources/catalog.go index 1ce28b54123..d23cda3538f 100644 --- a/bundle/direct/dresources/catalog.go +++ b/bundle/direct/dresources/catalog.go @@ -40,7 +40,7 @@ func (r *ResourceCatalog) DoRead(ctx context.Context, id string) (*catalog.Catal return r.client.Catalogs.GetByName(ctx, id) } -func (r *ResourceCatalog) DoCreate(ctx context.Context, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { response, err := r.client.Catalogs.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -49,7 +49,7 @@ func (r *ResourceCatalog) DoCreate(ctx context.Context, config *catalog.CreateCa } // DoUpdate updates the catalog in place and returns remote state. -func (r *ResourceCatalog) DoUpdate(ctx context.Context, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { updateRequest := catalog.UpdateCatalog{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index b41de453017..c16cf341e70 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -147,12 +147,39 @@ func (r *ResourceCluster) DoRead(ctx context.Context, id string) (*ClusterRemote return remote, nil } -func (r *ResourceCluster) DoCreate(ctx context.Context, config *ClusterState) (string, *ClusterRemote, error) { +func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config *ClusterState) (string, *ClusterRemote, error) { wait, err := r.client.Clusters.Create(ctx, makeCreateCluster(&config.ClusterSpec)) if err != nil { return "", nil, err } - return wait.ClusterId, nil, nil + id := wait.ClusterId + + // Save state immediately after the cluster is created so it is not orphaned + // if the subsequent wait or terminate is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + // Always wait for RUNNING first: clusters start in PENDING state and must be polled. + _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) + if err != nil { + return "", nil, err + } + + if config.Lifecycle != nil && config.Lifecycle.Started != nil && !*config.Lifecycle.Started { + // started=false: terminate the cluster after it reaches RUNNING. + deleteWaiter, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) + if err != nil { + return "", nil, err + } + _, err = deleteWaiter.Get() + if err != nil { + return "", nil, err + } + } + + return id, nil, nil } // hasClusterChanges reports whether the plan entry contains any Update changes @@ -161,7 +188,7 @@ func hasClusterChanges(entry *PlanEntry) bool { return entry.Changes.HasChangeExcept("lifecycle", "lifecycle.started") } -func (r *ResourceCluster) DoUpdate(ctx context.Context, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { +func (r *ResourceCluster) DoUpdate(ctx context.Context, _ *Engine, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { if hasClusterChanges(entry) { // Same retry as in TF provider logic // https://github.com/databricks/terraform-provider-databricks/blob/3eecd0f90cf99d7777e79a3d03c41f9b2aafb004/clusters/resource_cluster.go#L624 @@ -191,50 +218,21 @@ func (r *ResourceCluster) DoUpdate(ctx context.Context, id string, config *Clust desiredStarted := *config.Lifecycle.Started alreadyRunning := remoteClusterIsRunning(entry) if desiredStarted && !alreadyRunning { - // lifecycle.started=true: fire Start; WaitAfterUpdate polls for RUNNING. + // lifecycle.started=true: fire Start and wait for RUNNING. _, err := r.client.Clusters.Start(ctx, compute.StartCluster{ClusterId: id}) + if err != nil { + return nil, err + } + _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) return nil, err } else if !desiredStarted && alreadyRunning { - // lifecycle.started=false: fire Delete; WaitAfterUpdate polls for TERMINATED. + // lifecycle.started=false: fire Delete and wait for TERMINATED. // Delete does not remove the cluster, it just sets the state to TERMINATED. _, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) - return nil, err - } - - return nil, nil -} - -// WaitAfterUpdate waits for the cluster to reach the desired lifecycle state after DoUpdate. -func (r *ResourceCluster) WaitAfterUpdate(ctx context.Context, id string, config *ClusterState) (*ClusterRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil - } - - if *config.Lifecycle.Started { - _, err := r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) - return nil, err - } - - _, err := r.client.Clusters.WaitGetClusterTerminated(ctx, id, 15*time.Minute, nil) - return nil, err -} - -// WaitAfterCreate waits for the cluster to reach RUNNING state (clusters always start on creation). -// When lifecycle.started=false, it then terminates the cluster. -func (r *ResourceCluster) WaitAfterCreate(ctx context.Context, id string, config *ClusterState) (*ClusterRemote, error) { - // Always wait for RUNNING first: clusters start in PENDING state and must be polled. - _, err := r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) - if err != nil { - return nil, err - } - - if config.Lifecycle != nil && config.Lifecycle.Started != nil && !*config.Lifecycle.Started { - // started=false: terminate the cluster after it reaches RUNNING. - deleteWaiter, err := r.client.Clusters.Delete(ctx, compute.DeleteCluster{ClusterId: id}) if err != nil { return nil, err } - _, err = deleteWaiter.Get() + _, err = r.client.Clusters.WaitGetClusterTerminated(ctx, id, 15*time.Minute, nil) return nil, err } diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index dbf492a0bee..71d7b1f9218 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -264,7 +264,7 @@ func responseToState(createOrUpdateResp *dashboards.Dashboard, publishResp *dash } } -func (r *ResourceDashboard) DoCreate(ctx context.Context, config *DashboardState) (string, *DashboardState, error) { +func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *DashboardState) (string, *DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return "", nil, err @@ -321,7 +321,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, config *DashboardState return createResp.DashboardId, responseToState(createResp, publishResp, dashboard.SerializedDashboard, config.Published), nil } -func (r *ResourceDashboard) DoUpdate(ctx context.Context, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { +func (r *ResourceDashboard) DoUpdate(ctx context.Context, _ *Engine, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return nil, err diff --git a/bundle/direct/dresources/database_catalog.go b/bundle/direct/dresources/database_catalog.go index 9bffa708d73..f5e18e8da27 100644 --- a/bundle/direct/dresources/database_catalog.go +++ b/bundle/direct/dresources/database_catalog.go @@ -24,7 +24,7 @@ func (r *ResourceDatabaseCatalog) DoRead(ctx context.Context, id string) (*datab return r.client.Database.GetDatabaseCatalogByName(ctx, id) } -func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, _ *Engine, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { result, err := r.client.Database.CreateDatabaseCatalog(ctx, database.CreateDatabaseCatalogRequest{ Catalog: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, config *database return result.Name, nil, nil } -func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { request := database.UpdateDatabaseCatalogRequest{ DatabaseCatalog: *config, Name: id, diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 2169a61fc8e..35251eb526c 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -25,38 +25,42 @@ func (d *ResourceDatabaseInstance) DoRead(ctx context.Context, id string) (*data return d.client.Database.GetDatabaseInstanceByName(ctx, id) } -func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { +func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { waiter, err := d.client.Database.CreateDatabaseInstance(ctx, database.CreateDatabaseInstanceRequest{ DatabaseInstance: *config, }) if err != nil { return "", nil, err } - return waiter.Response.Name, nil, nil -} + id := waiter.Response.Name -func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { - request := database.UpdateDatabaseInstanceRequest{ - DatabaseInstance: *config, - Name: config.Name, - UpdateMask: "*", + // Save state immediately after the instance is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err } - request.DatabaseInstance.Uid = id - _, err := d.client.Database.UpdateDatabaseInstance(ctx, request) - return nil, err -} -func (d *ResourceDatabaseInstance) WaitAfterCreate(ctx context.Context, id string, config *database.DatabaseInstance) (*database.DatabaseInstance, error) { - waiter := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ + waiterObj := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ Response: config, Name: config.Name, Poll: func(timeout time.Duration, callback func(*database.DatabaseInstance)) (*database.DatabaseInstance, error) { return d.client.Database.WaitGetDatabaseInstanceDatabaseAvailable(ctx, config.Name, timeout, callback) }, } - // _ is remoteState, should we return it here? - _, err := waiter.GetWithTimeout(20 * time.Minute) + _, err = waiterObj.GetWithTimeout(20 * time.Minute) + return id, nil, err +} + +func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { + request := database.UpdateDatabaseInstanceRequest{ + DatabaseInstance: *config, + Name: config.Name, + UpdateMask: "*", + } + request.DatabaseInstance.Uid = id + _, err := d.client.Database.UpdateDatabaseInstance(ctx, request) return nil, err } diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go new file mode 100644 index 00000000000..c4c8be8e25f --- /dev/null +++ b/bundle/direct/dresources/engine.go @@ -0,0 +1,47 @@ +package dresources + +import ( + "errors" + "fmt" + "reflect" +) + +// Engine provides state persistence to resource implementations. +// Pass it to DoCreate or DoUpdate to save intermediate state before long-running +// wait operations, so the resource is not orphaned if deployment is interrupted. +type Engine struct { + id string + stateType reflect.Type + saveFunc func(id string, x any) error +} + +// NewEngine creates an Engine with the given state type and save function. +// The framework calls this before invoking DoCreate or DoUpdate. +func NewEngine(stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { + return &Engine{id: "", stateType: stateType, saveFunc: saveFunc} +} + +// NewNopEngine creates an Engine that discards all saves. Use in tests. +func NewNopEngine(stateType reflect.Type) *Engine { + return NewEngine(stateType, func(_ string, _ any) error { return nil }) +} + +// SetID sets the resource id for subsequent SaveState calls. +// Must be called before SaveState during DoCreate; for DoUpdate the Engine is +// pre-configured with the existing id. +func (e *Engine) SetID(id string) { + e.id = id +} + +// SaveState saves the resource state. x must be of the same pointer-to-struct +// type as the resource's state type. Returns an error if SetID was not called. +func (e *Engine) SaveState(x any) error { + if e.id == "" { + return errors.New("SaveState: id not set, call SetID first") + } + xt := reflect.TypeOf(x) + if xt != e.stateType { + return fmt.Errorf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt) + } + return e.saveFunc(e.id, x) +} diff --git a/bundle/direct/dresources/experiment.go b/bundle/direct/dresources/experiment.go index bedabc81365..71f807d4987 100644 --- a/bundle/direct/dresources/experiment.go +++ b/bundle/direct/dresources/experiment.go @@ -50,7 +50,7 @@ func (r *ResourceExperiment) DoRead(ctx context.Context, id string) (*ml.Experim return result.Experiment, nil } -func (r *ResourceExperiment) DoCreate(ctx context.Context, config *ml.CreateExperiment) (string, *ml.Experiment, error) { +func (r *ResourceExperiment) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateExperiment) (string, *ml.Experiment, error) { result, err := r.client.Experiments.CreateExperiment(ctx, *config) if err != nil { return "", nil, err @@ -58,7 +58,7 @@ func (r *ResourceExperiment) DoCreate(ctx context.Context, config *ml.CreateExpe return result.ExperimentId, nil, nil } -func (r *ResourceExperiment) DoUpdate(ctx context.Context, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { +func (r *ResourceExperiment) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { updateReq := ml.UpdateExperiment{ ExperimentId: id, NewName: config.Name, diff --git a/bundle/direct/dresources/external_location.go b/bundle/direct/dresources/external_location.go index 64eace48eb3..be29bed5830 100644 --- a/bundle/direct/dresources/external_location.go +++ b/bundle/direct/dresources/external_location.go @@ -44,7 +44,7 @@ func (r *ResourceExternalLocation) DoRead(ctx context.Context, id string) (*cata return r.client.ExternalLocations.GetByName(ctx, id) } -func (r *ResourceExternalLocation) DoCreate(ctx context.Context, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { response, err := r.client.ExternalLocations.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -53,7 +53,7 @@ func (r *ResourceExternalLocation) DoCreate(ctx context.Context, config *catalog } // DoUpdate updates the external location in place and returns remote state. -func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { updateRequest := catalog.UpdateExternalLocation{ Comment: config.Comment, CredentialName: config.CredentialName, diff --git a/bundle/direct/dresources/grants.go b/bundle/direct/dresources/grants.go index 596346f1614..f03df836698 100644 --- a/bundle/direct/dresources/grants.go +++ b/bundle/direct/dresources/grants.go @@ -107,8 +107,8 @@ func (r *ResourceGrants) DoRead(ctx context.Context, id string) (*GrantsState, e }, nil } -func (r *ResourceGrants) DoCreate(ctx context.Context, state *GrantsState) (string, *GrantsState, error) { - _, err := r.DoUpdate(ctx, "", state, nil) +func (r *ResourceGrants) DoCreate(ctx context.Context, engine *Engine, state *GrantsState) (string, *GrantsState, error) { + _, err := r.DoUpdate(ctx, engine, "", state, nil) if err != nil { // Grants Update is idempotent (additive PATCH), so retrying on transient errors is safe. return "", nil, retrySafe(err) @@ -117,7 +117,7 @@ func (r *ResourceGrants) DoCreate(ctx context.Context, state *GrantsState) (stri return state.SecurableType + "/" + state.FullName, nil, nil } -func (r *ResourceGrants) DoUpdate(ctx context.Context, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { +func (r *ResourceGrants) DoUpdate(ctx context.Context, _ *Engine, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { if state.FullName == "" { return nil, errors.New("internal error: grants full_name must be resolved before deployment") } diff --git a/bundle/direct/dresources/job.go b/bundle/direct/dresources/job.go index 49ca54036c4..efc013904b2 100644 --- a/bundle/direct/dresources/job.go +++ b/bundle/direct/dresources/job.go @@ -122,7 +122,7 @@ func makeJobRemote(job *jobs.Job) *JobRemote { } } -func (r *ResourceJob) DoCreate(ctx context.Context, config *jobs.JobSettings) (string, *JobRemote, error) { +func (r *ResourceJob) DoCreate(ctx context.Context, _ *Engine, config *jobs.JobSettings) (string, *JobRemote, error) { request, err := makeCreateJob(*config) if err != nil { return "", nil, err @@ -134,7 +134,7 @@ func (r *ResourceJob) DoCreate(ctx context.Context, config *jobs.JobSettings) (s return strconv.FormatInt(response.JobId, 10), nil, nil } -func (r *ResourceJob) DoUpdate(ctx context.Context, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { +func (r *ResourceJob) DoUpdate(ctx context.Context, _ *Engine, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { request, err := makeResetJob(*config, id) if err != nil { return nil, err diff --git a/bundle/direct/dresources/model.go b/bundle/direct/dresources/model.go index 9d04231456d..8b723f30603 100644 --- a/bundle/direct/dresources/model.go +++ b/bundle/direct/dresources/model.go @@ -52,7 +52,7 @@ func (r *ResourceMlflowModel) DoRead(ctx context.Context, id string) (*MlflowMod }, nil } -func (r *ResourceMlflowModel) DoCreate(ctx context.Context, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { response, err := r.client.ModelRegistry.CreateModel(ctx, *config) if err != nil { return "", nil, err @@ -62,7 +62,7 @@ func (r *ResourceMlflowModel) DoCreate(ctx context.Context, config *ml.CreateMod return response.RegisteredModel.Name, nil, nil } -func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { updateRequest := ml.UpdateModelRequest{ Name: id, Description: config.Description, diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index ccab3a13dea..3ba258d1090 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -128,13 +128,22 @@ func (r *ResourceModelServingEndpoint) DoRead(ctx context.Context, id string) (* }, nil } -func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Engine, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { waiter, err := r.client.ServingEndpoints.Create(ctx, *config) if err != nil { return "", nil, err } + id := waiter.Response.Name - return waiter.Response.Name, nil, nil + // Save state immediately after the endpoint is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + + remote, err := r.waitForEndpointReady(ctx, config.Name) + return id, remote, err } // waitForEndpointReady waits for the serving endpoint to be ready (not updating) @@ -150,14 +159,6 @@ func (r *ResourceModelServingEndpoint) waitForEndpointReady(ctx context.Context, }, nil } -func (r *ResourceModelServingEndpoint) WaitAfterCreate(ctx context.Context, id string, config *serving.CreateServingEndpoint) (*ModelServingEndpointRemote, error) { - return r.waitForEndpointReady(ctx, config.Name) -} - -func (r *ResourceModelServingEndpoint) WaitAfterUpdate(ctx context.Context, id string, config *serving.CreateServingEndpoint) (*ModelServingEndpointRemote, error) { - return r.waitForEndpointReady(ctx, config.Name) -} - func (r *ResourceModelServingEndpoint) updateAiGateway(ctx context.Context, id string, aiGateway *serving.AiGatewayConfig) error { if aiGateway == nil { req := serving.PutAiGatewayRequest{ @@ -290,7 +291,7 @@ func (r *ResourceModelServingEndpoint) updateTags(ctx context.Context, id string return nil } -func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { var err error // Terraform makes these API calls sequentially. We do the same here. @@ -324,7 +325,7 @@ func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, id string, } } - return nil, nil + return r.waitForEndpointReady(ctx, config.Name) } func (r *ResourceModelServingEndpoint) DoDelete(ctx context.Context, id string, _ *serving.CreateServingEndpoint) error { diff --git a/bundle/direct/dresources/permissions.go b/bundle/direct/dresources/permissions.go index 91ca9000aaf..cca6f3a445a 100644 --- a/bundle/direct/dresources/permissions.go +++ b/bundle/direct/dresources/permissions.go @@ -215,9 +215,9 @@ func (r *ResourcePermissions) DoRead(ctx context.Context, id string) (*Permissio } // DoCreate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoCreate(ctx context.Context, newState *PermissionsState) (string, *PermissionsState, error) { +func (r *ResourcePermissions) DoCreate(ctx context.Context, engine *Engine, newState *PermissionsState) (string, *PermissionsState, error) { // should we remember the default here? - _, err := r.DoUpdate(ctx, newState.ObjectID, newState, nil) + _, err := r.DoUpdate(ctx, engine, newState.ObjectID, newState, nil) if err != nil { // Permissions Set is idempotent (PUT), so retrying on transient errors is safe. return "", nil, retrySafe(err) @@ -227,7 +227,7 @@ func (r *ResourcePermissions) DoCreate(ctx context.Context, newState *Permission } // DoUpdate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { +func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ *Engine, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { extractedType, extractedID, err := parsePermissionsID(newState.ObjectID) if err != nil { return nil, err diff --git a/bundle/direct/dresources/pipeline.go b/bundle/direct/dresources/pipeline.go index 0861f0fad2f..e2e6aa12807 100644 --- a/bundle/direct/dresources/pipeline.go +++ b/bundle/direct/dresources/pipeline.go @@ -123,7 +123,7 @@ func makePipelineRemote(p *pipelines.GetPipelineResponse) *PipelineRemote { } } -func (r *ResourcePipeline) DoCreate(ctx context.Context, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { +func (r *ResourcePipeline) DoCreate(ctx context.Context, _ *Engine, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { response, err := r.client.Pipelines.Create(ctx, *config) if err != nil { return "", nil, err @@ -131,7 +131,7 @@ func (r *ResourcePipeline) DoCreate(ctx context.Context, config *pipelines.Creat return response.PipelineId, nil, nil } -func (r *ResourcePipeline) DoUpdate(ctx context.Context, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { +func (r *ResourcePipeline) DoUpdate(ctx context.Context, _ *Engine, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { request := pipelines.EditPipeline{ AllowDuplicateNames: config.AllowDuplicateNames, BudgetPolicyId: config.BudgetPolicyId, diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index 11c0e7a0fe0..d53ac29dade 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -135,7 +135,7 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, config *PostgresB return remote.Name, remote, nil } -func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Branch object, diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index 35a1c0f0cc6..f8d3f51f6d3 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 04dd6874f2c..91e43df1028 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -179,7 +179,7 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, config *Postgre return remote.Name, remote, nil } -func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Endpoint object, diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index fc2ef631e38..35f223a534d 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -138,7 +138,7 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, config *Postgres return remote.Name, remote, nil } -func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { // Build the mask from the plan's change list and prefix with "spec." (the // API expects paths relative to Project). The API rejects mask entries // that aren't also populated in the request body, and a wildcard "*" diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index 623605faf43..22f3c4d89c1 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ diff --git a/bundle/direct/dresources/quality_monitor.go b/bundle/direct/dresources/quality_monitor.go index c66fed4e0bb..7a0473ab543 100644 --- a/bundle/direct/dresources/quality_monitor.go +++ b/bundle/direct/dresources/quality_monitor.go @@ -72,7 +72,7 @@ func (r *ResourceQualityMonitor) DoRead(ctx context.Context, id string) (*catalo }) } -func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, _ *Engine, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { req := config.CreateMonitor req.TableName = config.TableName //nolint:staticcheck // Direct quality_monitor resource still uses legacy monitor endpoints; v1 data-quality migration is separate work. @@ -83,7 +83,7 @@ func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, config *QualityMo return response.TableName, response, nil } -func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, _ *Engine, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { updateRequest := catalog.UpdateMonitor{ TableName: id, BaselineTableName: config.BaselineTableName, diff --git a/bundle/direct/dresources/registered_model.go b/bundle/direct/dresources/registered_model.go index 888870a7581..d1438a25bf5 100644 --- a/bundle/direct/dresources/registered_model.go +++ b/bundle/direct/dresources/registered_model.go @@ -55,7 +55,7 @@ func (r *ResourceRegisteredModel) DoRead(ctx context.Context, id string) (*catal }) } -func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { response, err := r.client.RegisteredModels.Create(ctx, *config) if err != nil { return "", nil, err @@ -64,7 +64,7 @@ func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, config *catalog. return response.FullName, response, nil } -func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { updateRequest := catalog.UpdateRegisteredModelRequest{ FullName: id, Comment: config.Comment, diff --git a/bundle/direct/dresources/schema.go b/bundle/direct/dresources/schema.go index f082ea6c547..5895961fab7 100644 --- a/bundle/direct/dresources/schema.go +++ b/bundle/direct/dresources/schema.go @@ -37,7 +37,7 @@ func (r *ResourceSchema) DoRead(ctx context.Context, id string) (*catalog.Schema return r.client.Schemas.GetByFullName(ctx, id) } -func (r *ResourceSchema) DoCreate(ctx context.Context, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { response, err := r.client.Schemas.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -46,7 +46,7 @@ func (r *ResourceSchema) DoCreate(ctx context.Context, config *catalog.CreateSch } // DoUpdate updates the schema in place and returns remote state. -func (r *ResourceSchema) DoUpdate(ctx context.Context, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { updateRequest := catalog.UpdateSchema{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/schema_test.go b/bundle/direct/dresources/schema_test.go index d013610e052..e3a66bf4cc6 100644 --- a/bundle/direct/dresources/schema_test.go +++ b/bundle/direct/dresources/schema_test.go @@ -2,6 +2,7 @@ package dresources import ( "encoding/json" + "reflect" "testing" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -24,7 +25,8 @@ func TestResourceSchema_DoUpdate_WithUnsupportedForceSendFields(t *testing.T) { ForceSendFields: nil, } - id, _, err := adapter.DoCreate(ctx, config) + nopEngine := NewNopEngine(reflect.TypeOf(config)) + id, _, err := adapter.DoCreate(ctx, nopEngine, config) require.NoError(t, err) config.Comment = "updated comment" @@ -37,7 +39,7 @@ func TestResourceSchema_DoUpdate_WithUnsupportedForceSendFields(t *testing.T) { "Owner", // Unsupported - should be filtered out } - _, err = adapter.DoUpdate(ctx, id, config, &PlanEntry{}) + _, err = adapter.DoUpdate(ctx, nopEngine, id, config, &PlanEntry{}) require.NoError(t, err) result, err := adapter.DoRead(ctx, id) diff --git a/bundle/direct/dresources/secret_scope.go b/bundle/direct/dresources/secret_scope.go index c811dc84d77..a77fb1ebd3c 100644 --- a/bundle/direct/dresources/secret_scope.go +++ b/bundle/direct/dresources/secret_scope.go @@ -66,7 +66,7 @@ func (r *ResourceSecretScope) DoRead(ctx context.Context, id string) (*workspace return nil, fmt.Errorf("secret scope %q not found", id) } -func (r *ResourceSecretScope) DoCreate(ctx context.Context, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { +func (r *ResourceSecretScope) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { err := r.client.Secrets.CreateScope(ctx, state.CreateScope) if err != nil { return "", nil, err diff --git a/bundle/direct/dresources/secret_scope_acls.go b/bundle/direct/dresources/secret_scope_acls.go index ef04cb7cb6a..14cd05b8f91 100644 --- a/bundle/direct/dresources/secret_scope_acls.go +++ b/bundle/direct/dresources/secret_scope_acls.go @@ -92,7 +92,7 @@ func (r *ResourceSecretScopeAcls) RemapState(remote *SecretScopeAclsState) *Secr return remote } -func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { err := r.setACLs(ctx, state.ScopeName, state.Acls) if err != nil { return "", nil, err @@ -109,7 +109,7 @@ func (r *ResourceSecretScopeAcls) DoUpdateWithID(ctx context.Context, id string, return state.ScopeName, nil, nil } -func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, _ *Engine, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { _, _, err := r.DoUpdateWithID(ctx, id, state) return nil, err } diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index 146dee5294d..5dbd9d1d621 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -118,7 +118,7 @@ func (r *ResourceSqlWarehouse) DoRead(ctx context.Context, id string) (*SqlWareh } // DoCreate creates the warehouse and returns its id. -func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, _ *Engine, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { waiter, err := r.client.Warehouses.Create(ctx, config.CreateWarehouseRequest) if err != nil { return "", nil, err @@ -133,7 +133,7 @@ func hasWarehouseChanges(entry *PlanEntry) bool { } // DoUpdate updates the warehouse in place. -func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *Engine, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { if hasWarehouseChanges(entry) { request := sql.EditWarehouseRequest{ AutoStopMins: config.AutoStopMins, diff --git a/bundle/direct/dresources/synced_database_table.go b/bundle/direct/dresources/synced_database_table.go index d45c6fb3fc7..7041329ca7d 100644 --- a/bundle/direct/dresources/synced_database_table.go +++ b/bundle/direct/dresources/synced_database_table.go @@ -24,7 +24,7 @@ func (r *ResourceSyncedDatabaseTable) DoRead(ctx context.Context, name string) ( return r.client.Database.GetSyncedDatabaseTableByName(ctx, name) } -func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, _ *Engine, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { result, err := r.client.Database.CreateSyncedDatabaseTable(ctx, database.CreateSyncedDatabaseTableRequest{ SyncedTable: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, config *data return result.Name, nil, nil } -func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { request := database.UpdateSyncedDatabaseTableRequest{ SyncedTable: *config, Name: id, diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 1178def16be..70f06891a4d 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -65,24 +65,28 @@ func (r *ResourceVectorSearchEndpoint) DoRead(ctx context.Context, id string) (* return newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { - waiter, err := r.client.VectorSearchEndpoints.CreateEndpoint(ctx, *config) +func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Engine, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { + _, err := r.client.VectorSearchEndpoints.CreateEndpoint(ctx, *config) if err != nil { return "", nil, err } id := config.Name - return id, newVectorSearchEndpointRemote(waiter.Response), nil -} -func (r *ResourceVectorSearchEndpoint) WaitAfterCreate(ctx context.Context, id string, config *vectorsearch.CreateEndpoint) (*VectorSearchEndpointRemote, error) { + // Save state immediately after the endpoint is created so it is not orphaned + // if the subsequent wait is interrupted. + engine.SetID(id) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } + info, err := r.client.VectorSearchEndpoints.WaitGetEndpointVectorSearchEndpointOnline(ctx, config.Name, 60*time.Minute, nil) if err != nil { - return nil, err + return "", nil, err } - return newVectorSearchEndpointRemote(info), nil + return id, newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { +func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { if entry.Changes.HasChange(pathBudgetPolicyId) { _, err := r.client.VectorSearchEndpoints.UpdateEndpointBudgetPolicy(ctx, vectorsearch.PatchEndpointBudgetPolicyRequest{ EndpointName: id, diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 48ee6f0f968..0bba784a90a 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -121,8 +121,8 @@ func (r *ResourceVectorSearchIndex) DoRead(ctx context.Context, id string) (*Vec }, nil } -func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { - index, err := r.client.VectorSearchIndexes.CreateIndex(ctx, config.CreateVectorIndexRequest) +func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { + _, err := r.client.VectorSearchIndexes.CreateIndex(ctx, config.CreateVectorIndexRequest) if err != nil { return "", nil, err } @@ -135,26 +135,18 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, config *Vector return "", nil, err } config.EndpointUuid = endpointUuid - return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil -} -// No DoUpdate: vector search indexes have no update API. All SDK fields are -// declared in resources.yml under recreate_on_changes or ignore_remote_changes. -// If a future SDK bump adds a new field that isn't classified, the framework -// rejects the resulting Update plan at bundle_plan.go (see also the reflection -// test in vector_search_index_test.go which catches it earlier at unit-test time). - -func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { - return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) -} + // Save state immediately after the index is created (endpoint UUID now set) so it + // is not orphaned if the subsequent provisioning wait is interrupted. + engine.SetID(config.Name) + if err := engine.SaveState(config); err != nil { + return "", nil, err + } -// WaitAfterCreate polls GetIndex until Status.Ready=true. CreateIndex returns -// immediately with metadata of an index whose embedding pipeline is still -// provisioning; queries against an index that isn't ready fail. Blocking here -// lets dependent resources (and the next plan) see a usable index. -func (r *ResourceVectorSearchIndex) WaitAfterCreate(ctx context.Context, id string, config *VectorSearchIndexState) (*VectorSearchIndexRemote, error) { + // CreateIndex returns immediately; poll until the embedding pipeline is ready so + // dependent resources and the next plan see a usable index. index, err := retries.Poll(ctx, createIndexTimeout, func() (*vectorsearch.VectorIndex, *retries.Err) { - idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, id) + idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, config.Name) if getErr != nil { return nil, retries.Halt(getErr) } @@ -168,9 +160,19 @@ func (r *ResourceVectorSearchIndex) WaitAfterCreate(ctx context.Context, id stri return idx, nil }) if err != nil { - return nil, err + return "", nil, err } - return &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: config.EndpointUuid}, nil + return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil +} + +// No DoUpdate: vector search indexes have no update API. All SDK fields are +// declared in resources.yml under recreate_on_changes or ignore_remote_changes. +// If a future SDK bump adds a new field that isn't classified, the framework +// rejects the resulting Update plan at bundle_plan.go (see also the reflection +// test in vector_search_index_test.go which catches it earlier at unit-test time). + +func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { + return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) } // WaitAfterDelete polls GetIndex until it returns 404. The DELETE call is diff --git a/bundle/direct/dresources/volume.go b/bundle/direct/dresources/volume.go index 73bf7a79b40..3291226b12d 100644 --- a/bundle/direct/dresources/volume.go +++ b/bundle/direct/dresources/volume.go @@ -42,7 +42,7 @@ func (r *ResourceVolume) DoRead(ctx context.Context, id string) (*catalog.Volume return r.client.Volumes.ReadByName(ctx, id) } -func (r *ResourceVolume) DoCreate(ctx context.Context, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { response, err := r.client.Volumes.Create(ctx, *config) if err != nil { return "", nil, err @@ -50,7 +50,7 @@ func (r *ResourceVolume) DoCreate(ctx context.Context, config *catalog.CreateVol return response.FullName, response, nil } -func (r *ResourceVolume) DoUpdate(ctx context.Context, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { updateRequest := catalog.UpdateVolumeRequestContent{ Comment: config.Comment, Name: id, From 2898f5506e95bb960a040bab5b29695654f7f2c9 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 29 May 2026 15:03:06 +0200 Subject: [PATCH 02/21] direct/testserver: improve app test realism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make testserver app DELETE asynchronous (sets DELETING state) to match real API behaviour. AppsGet auto-removes the app after returning it in DELETING state, so the next request sees it as gone. app_test.go: use real SDK calls (Create + DeleteByName) to put the app in DELETING state before testing DoCreate retry logic, instead of injecting state directly. RetriesWhenGetReturnsNotFound uses a one-shot POST override so GET returns 404 naturally without a custom GET handler. all_test.go: retry DoRead once after DoDelete to let async deletions (DELETING → gone) clear before asserting the resource is absent. Add Server.GetWorkspace to testserver for pre-seeding state in tests. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/all_test.go | 5 ++ bundle/direct/dresources/app_test.go | 99 ++++++---------------------- libs/testserver/apps.go | 36 ++++++++++ libs/testserver/handlers.go | 4 +- libs/testserver/server.go | 6 ++ 5 files changed, 68 insertions(+), 82 deletions(-) diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index d1fe61d80c7..fb33d12cb9d 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -904,6 +904,11 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W deleteIsNoop := strings.HasSuffix(group, "permissions") || strings.HasSuffix(group, "grants") remoteAfterDelete, err := adapter.DoRead(ctx, createdID) + // Some resources delete asynchronously (e.g. apps transition through a + // DELETING state). Read once more to let that pending state clear. + if err == nil && remoteAfterDelete != nil && !deleteIsNoop { + remoteAfterDelete, err = adapter.DoRead(ctx, createdID) + } if deleteIsNoop { require.NoError(t, err) } else { diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go index 7d7ec940567..19f2a35a332 100644 --- a/bundle/direct/dresources/app_test.go +++ b/bundle/direct/dresources/app_test.go @@ -17,47 +17,6 @@ import ( // an app already exists but is in DELETING state. func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { server := testserver.New(t) - - createCallCount := 0 - getCallCount := 0 - - server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { - createCallCount++ - if createCallCount == 1 { - return testserver.Response{ - StatusCode: 409, - Body: map[string]string{ - "error_code": "RESOURCE_ALREADY_EXISTS", - "message": "An app with the same name already exists.", - }, - } - } - return apps.App{ - Name: "test-app", - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - - server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { - getCallCount++ - if getCallCount == 1 { - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateDeleting, - }, - } - } - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - testserver.AddDefaultHandlers(server) client, err := databricks.NewWorkspaceClient(&databricks.Config{ @@ -66,14 +25,21 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { }) require.NoError(t, err) - r := (&ResourceApp{}).New(client) ctx := t.Context() + + // Create then delete an app to put it in DELETING state. + // The testserver's DELETE is asynchronous: it sets DELETING rather than + // removing immediately, so the retry create will find the app in that state. + _, err = client.Apps.Create(ctx, apps.CreateAppRequest{App: apps.App{Name: "test-app"}}) + require.NoError(t, err) + _, err = client.Apps.DeleteByName(ctx, "test-app") + require.NoError(t, err) + + r := (&ResourceApp{}).New(client) name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) - assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)") - assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } // TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries @@ -81,45 +47,20 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { server := testserver.New(t) - createCallCount := 0 - getCallCount := 0 - + // Simulate a race: the app existed when Create was called (returns 409) but + // was deleted before the existence check (GET returns 404). The first POST + // returns 409 without storing anything so the standard GET handler returns + // 404 naturally, and the retry POST creates the app normally. + rejectedOnce := false server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any { - createCallCount++ - if createCallCount == 1 { + if !rejectedOnce { + rejectedOnce = true return testserver.Response{ StatusCode: 409, - Body: map[string]string{ - "error_code": "RESOURCE_ALREADY_EXISTS", - "message": "An app with the same name already exists.", - }, + Body: map[string]string{"error_code": "RESOURCE_ALREADY_EXISTS", "message": "An app with the same name already exists."}, } } - return apps.App{ - Name: "test-app", - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } - }) - - server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any { - getCallCount++ - if getCallCount == 1 { - return testserver.Response{ - StatusCode: 404, - Body: map[string]string{ - "error_code": "RESOURCE_DOES_NOT_EXIST", - "message": "App not found.", - }, - } - } - return apps.App{ - Name: req.Vars["name"], - ComputeStatus: &apps.ComputeStatus{ - State: apps.ComputeStateActive, - }, - } + return req.Workspace.AppsUpsert(req, "") }) testserver.AddDefaultHandlers(server) @@ -136,8 +77,6 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { require.NoError(t, err) assert.Equal(t, "test-app", name) - assert.Equal(t, 2, createCallCount, "expected Create to be called twice") - assert.Equal(t, 2, getCallCount, "expected Get to be called twice: once to check app state, once by waitForApp") } func TestAppDoUpdate_UpdateMaskHasAllFields(t *testing.T) { diff --git a/libs/testserver/apps.go b/libs/testserver/apps.go index b35632e27ad..33ba5e2b6c2 100644 --- a/libs/testserver/apps.go +++ b/libs/testserver/apps.go @@ -72,6 +72,42 @@ func (s *FakeWorkspace) AppsCreateUpdate(req Request, name string) Response { } } +func (s *FakeWorkspace) AppsGet(_ Request, name string) Response { + defer s.LockUnlock()() + + app, ok := s.Apps[name] + if !ok { + return Response{StatusCode: 404, Body: map[string]string{"message": "App not found: " + name}} + } + + // When an app is in DELETING state, remove it from the store after returning. + // This simulates deletion completing so the next create attempt succeeds. + if app.ComputeStatus != nil && app.ComputeStatus.State == apps.ComputeStateDeleting { + delete(s.Apps, name) + } + + return Response{Body: app} +} + +// AppsDelete marks an app as DELETING, simulating asynchronous deletion. +// The app is removed from the store when it is next retrieved via AppsGet. +func (s *FakeWorkspace) AppsDelete(_ Request, name string) Response { + defer s.LockUnlock()() + + app, ok := s.Apps[name] + if !ok { + return Response{StatusCode: 404} + } + + app.ComputeStatus = &apps.ComputeStatus{ + State: apps.ComputeStateDeleting, + Message: "App compute is being deleted.", + } + s.Apps[name] = app + + return Response{} +} + func (s *FakeWorkspace) AppsGetUpdate(_ Request, name string) Response { defer s.LockUnlock()() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index a5dea385abe..36db891e8bc 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -404,7 +404,7 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("GET", "/api/2.0/apps/{name}", func(req Request) any { - return MapGet(req.Workspace, req.Workspace.Apps, req.Vars["name"]) + return req.Workspace.AppsGet(req, req.Vars["name"]) }) server.Handle("POST", "/api/2.0/apps", func(req Request) any { @@ -416,7 +416,7 @@ func AddDefaultHandlers(server *Server) { }) server.Handle("DELETE", "/api/2.0/apps/{name}", func(req Request) any { - return MapDelete(req.Workspace, req.Workspace.Apps, req.Vars["name"]) + return req.Workspace.AppsDelete(req, req.Vars["name"]) }) // Schemas: diff --git a/libs/testserver/server.go b/libs/testserver/server.go index eb0268d694f..c8ae59d539b 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -296,6 +296,12 @@ Response.Body = '' return s } +// GetWorkspace returns (creating if necessary) the FakeWorkspace for the given token. +// Use this in tests to pre-seed state before making requests. +func (s *Server) GetWorkspace(token string) *FakeWorkspace { + return s.getWorkspaceForToken(token) +} + func (s *Server) getWorkspaceForToken(token string) *FakeWorkspace { if token == "" { return nil From c30f8d14797468962cbfa13f6aa02ad3d0452ca5 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 10:52:36 +0200 Subject: [PATCH 03/21] testserver: remove unused GetWorkspace method Co-authored-by: Denis Bilenko --- libs/testserver/server.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libs/testserver/server.go b/libs/testserver/server.go index c8ae59d539b..eb0268d694f 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -296,12 +296,6 @@ Response.Body = '' return s } -// GetWorkspace returns (creating if necessary) the FakeWorkspace for the given token. -// Use this in tests to pre-seed state before making requests. -func (s *Server) GetWorkspace(token string) *FakeWorkspace { - return s.getWorkspaceForToken(token) -} - func (s *Server) getWorkspaceForToken(token string) *FakeWorkspace { if token == "" { return nil From da50fee19b81a9db59fbd8566892bd8c67e9ec34 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 15:05:38 +0200 Subject: [PATCH 04/21] direct: simplify Engine API: SaveState(id, state) replaces SetID+SaveState Collapse the two-step engine.SetID(id) / engine.SaveState(state) pattern into a single engine.SaveState(id, state) call. The Engine still tracks the id internally and panics if a subsequent call passes a different id (guards against bugs). Also extract the polling loop from vector_search_index.DoCreate into a private waitForIndexReady helper so that the unchanged DoDelete stays between the two functions and does not appear in the diff. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 2 - bundle/direct/dresources/adapter.go | 2 +- bundle/direct/dresources/app.go | 4 +- bundle/direct/dresources/cluster.go | 3 +- bundle/direct/dresources/database_instance.go | 3 +- bundle/direct/dresources/engine.go | 19 +++----- .../dresources/model_serving_endpoint.go | 3 +- .../dresources/vector_search_endpoint.go | 3 +- .../direct/dresources/vector_search_index.go | 43 +++++++++++-------- 9 files changed, 40 insertions(+), 42 deletions(-) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index b9db11fa6fa..3380e5018b6 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -129,8 +129,6 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) - engine.SetID(id) - remoteState, err := retryOnTransient(ctx, func() (any, error) { return d.Adapter.DoUpdate(ctx, engine, id, newState, planEntry) }) diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index a29a094cc78..0e273545157 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,7 +53,7 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Call engine.SetID then engine.SaveState to persist intermediate state before long-running waits. + // Call engine.SaveState(id, state) to persist intermediate state before long-running waits. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index e36036d28b0..bceace5284b 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -156,8 +156,8 @@ func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppS // Save state as soon as the app exists so it is not orphaned if the wait or // lifecycle management is interrupted. - engine.SetID(app.Name) - if err := engine.SaveState(config); err != nil { + + if err := engine.SaveState(app.Name, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index c16cf341e70..45aaa16b1bc 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -156,8 +156,7 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config * // Save state immediately after the cluster is created so it is not orphaned // if the subsequent wait or terminate is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 35251eb526c..3f0dc827fab 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -36,8 +36,7 @@ func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, // Save state immediately after the instance is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index c4c8be8e25f..1e9bdd7b28c 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -1,7 +1,6 @@ package dresources import ( - "errors" "fmt" "reflect" ) @@ -26,18 +25,14 @@ func NewNopEngine(stateType reflect.Type) *Engine { return NewEngine(stateType, func(_ string, _ any) error { return nil }) } -// SetID sets the resource id for subsequent SaveState calls. -// Must be called before SaveState during DoCreate; for DoUpdate the Engine is -// pre-configured with the existing id. -func (e *Engine) SetID(id string) { - e.id = id -} - -// SaveState saves the resource state. x must be of the same pointer-to-struct -// type as the resource's state type. Returns an error if SetID was not called. -func (e *Engine) SaveState(x any) error { +// SaveState saves the resource state. id must be the resource's identifier; on +// the first call it is recorded, and subsequent calls panic if a different id is +// passed. x must be a pointer to the same struct type as the resource's state. +func (e *Engine) SaveState(id string, x any) error { if e.id == "" { - return errors.New("SaveState: id not set, call SetID first") + e.id = id + } else if e.id != id { + panic(fmt.Sprintf("SaveState: id mismatch: expected %q, got %q", e.id, id)) } xt := reflect.TypeOf(x) if xt != e.stateType { diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 3ba258d1090..94d7fe8ee6f 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -137,8 +137,7 @@ func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 70f06891a4d..001df65e9c3 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -74,8 +74,7 @@ func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - engine.SetID(id) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(id, config); err != nil { return "", nil, err } diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 0bba784a90a..68e6a86e85e 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -138,15 +138,34 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine // Save state immediately after the index is created (endpoint UUID now set) so it // is not orphaned if the subsequent provisioning wait is interrupted. - engine.SetID(config.Name) - if err := engine.SaveState(config); err != nil { + if err := engine.SaveState(config.Name, config); err != nil { return "", nil, err } - // CreateIndex returns immediately; poll until the embedding pipeline is ready so - // dependent resources and the next plan see a usable index. + remote, err := r.waitForIndexReady(ctx, config.Name, endpointUuid) + if err != nil { + return "", nil, err + } + return config.Name, remote, nil +} + +// No DoUpdate: vector search indexes have no update API. All SDK fields are +// declared in resources.yml under recreate_on_changes or ignore_remote_changes. +// If a future SDK bump adds a new field that isn't classified, the framework +// rejects the resulting Update plan at bundle_plan.go (see also the reflection +// test in vector_search_index_test.go which catches it earlier at unit-test time). + +func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { + return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) +} + +// waitForIndexReady polls GetIndex until Status.Ready=true. CreateIndex returns +// immediately with metadata of an index whose embedding pipeline is still +// provisioning; queries against an index that isn't ready fail. Blocking here +// lets dependent resources (and the next plan) see a usable index. +func (r *ResourceVectorSearchIndex) waitForIndexReady(ctx context.Context, id, endpointUuid string) (*VectorSearchIndexRemote, error) { index, err := retries.Poll(ctx, createIndexTimeout, func() (*vectorsearch.VectorIndex, *retries.Err) { - idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, config.Name) + idx, getErr := r.client.VectorSearchIndexes.GetIndexByIndexName(ctx, id) if getErr != nil { return nil, retries.Halt(getErr) } @@ -160,19 +179,9 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine return idx, nil }) if err != nil { - return "", nil, err + return nil, err } - return config.Name, &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil -} - -// No DoUpdate: vector search indexes have no update API. All SDK fields are -// declared in resources.yml under recreate_on_changes or ignore_remote_changes. -// If a future SDK bump adds a new field that isn't classified, the framework -// rejects the resulting Update plan at bundle_plan.go (see also the reflection -// test in vector_search_index_test.go which catches it earlier at unit-test time). - -func (r *ResourceVectorSearchIndex) DoDelete(ctx context.Context, id string, _ *VectorSearchIndexState) error { - return r.client.VectorSearchIndexes.DeleteIndexByIndexName(ctx, id) + return &VectorSearchIndexRemote{VectorIndex: *index, EndpointUuid: endpointUuid}, nil } // WaitAfterDelete polls GetIndex until it returns 404. The DELETE call is From 966b6596668c104f5b9f3c586725b79b8c0f5ed8 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 15:53:22 +0200 Subject: [PATCH 05/21] direct: Engine.SaveState takes ctx, returns void; logs I/O failures internally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resource implementations cannot recover from a state-persistence failure — the resource already exists on the server and aborting would not undo its creation. Log the error via logdiag instead of propagating it, and drop the error return so call sites are a single statement. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/adapter.go | 2 +- bundle/direct/dresources/app.go | 4 +--- bundle/direct/dresources/cluster.go | 4 +--- bundle/direct/dresources/database_instance.go | 4 +--- bundle/direct/dresources/engine.go | 13 ++++++++++--- bundle/direct/dresources/model_serving_endpoint.go | 4 +--- bundle/direct/dresources/vector_search_endpoint.go | 4 +--- bundle/direct/dresources/vector_search_index.go | 4 +--- 8 files changed, 17 insertions(+), 22 deletions(-) diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 0e273545157..cfcd675f9b2 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -53,7 +53,7 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Call engine.SaveState(id, state) to persist intermediate state before long-running waits. + // Call engine.SaveState(ctx, id, state) to persist intermediate state before long-running waits. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index bceace5284b..f82de0e2363 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -157,9 +157,7 @@ func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppS // Save state as soon as the app exists so it is not orphaned if the wait or // lifecycle management is interrupted. - if err := engine.SaveState(app.Name, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, app.Name, config) remote, err := r.waitForApp(ctx, r.client, config.Name) if err != nil { diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index 45aaa16b1bc..e62a8e99775 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -156,9 +156,7 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config * // Save state immediately after the cluster is created so it is not orphaned // if the subsequent wait or terminate is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) // Always wait for RUNNING first: clusters start in PENDING state and must be polled. _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index 3f0dc827fab..c66a0bd092a 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -36,9 +36,7 @@ func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, // Save state immediately after the instance is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) waiterObj := &database.WaitGetDatabaseInstanceDatabaseAvailable[database.DatabaseInstance]{ Response: config, diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index 1e9bdd7b28c..46deca6600d 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -1,8 +1,11 @@ package dresources import ( + "context" "fmt" "reflect" + + "github.com/databricks/cli/libs/logdiag" ) // Engine provides state persistence to resource implementations. @@ -28,7 +31,9 @@ func NewNopEngine(stateType reflect.Type) *Engine { // SaveState saves the resource state. id must be the resource's identifier; on // the first call it is recorded, and subsequent calls panic if a different id is // passed. x must be a pointer to the same struct type as the resource's state. -func (e *Engine) SaveState(id string, x any) error { +// Failures to persist state are logged but do not abort the deployment — the +// resource already exists and aborting would not undo its creation. +func (e *Engine) SaveState(ctx context.Context, id string, x any) { if e.id == "" { e.id = id } else if e.id != id { @@ -36,7 +41,9 @@ func (e *Engine) SaveState(id string, x any) error { } xt := reflect.TypeOf(x) if xt != e.stateType { - return fmt.Errorf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt) + panic(fmt.Sprintf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt)) + } + if err := e.saveFunc(e.id, x); err != nil { + logdiag.LogError(ctx, err) } - return e.saveFunc(e.id, x) } diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 94d7fe8ee6f..091effa0631 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -137,9 +137,7 @@ func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) remote, err := r.waitForEndpointReady(ctx, config.Name) return id, remote, err diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 001df65e9c3..719901b79c3 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -74,9 +74,7 @@ func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Eng // Save state immediately after the endpoint is created so it is not orphaned // if the subsequent wait is interrupted. - if err := engine.SaveState(id, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, id, config) info, err := r.client.VectorSearchEndpoints.WaitGetEndpointVectorSearchEndpointOnline(ctx, config.Name, 60*time.Minute, nil) if err != nil { diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index 68e6a86e85e..ce796bc9f93 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -138,9 +138,7 @@ func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine // Save state immediately after the index is created (endpoint UUID now set) so it // is not orphaned if the subsequent provisioning wait is interrupted. - if err := engine.SaveState(config.Name, config); err != nil { - return "", nil, err - } + engine.SaveState(ctx, config.Name, config) remote, err := r.waitForIndexReady(ctx, config.Name, endpointUuid) if err != nil { From d851cdd601f2f3de5a764cd1059006dc7ee09920 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 1 Jun 2026 17:13:31 +0200 Subject: [PATCH 06/21] direct: save state before wait in postgres and dashboard resources All six resources with a gap between resource creation and a subsequent long-running wait now call engine.SaveState immediately after the create API returns, using waiter.Name() (available before Wait()) for the postgres resources and createResp.DashboardId for the dashboard. This ensures an interrupted deployment leaves a tracked resource rather than an orphan the next plan must rediscover from remote state. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/dashboard.go | 3 ++- bundle/direct/dresources/postgres_branch.go | 3 ++- bundle/direct/dresources/postgres_catalog.go | 3 ++- bundle/direct/dresources/postgres_endpoint.go | 3 ++- bundle/direct/dresources/postgres_project.go | 3 ++- bundle/direct/dresources/postgres_synced_table.go | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 71d7b1f9218..e0917c490b1 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -264,7 +264,7 @@ func responseToState(createOrUpdateResp *dashboards.Dashboard, publishResp *dash } } -func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *DashboardState) (string, *DashboardState, error) { +func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config *DashboardState) (string, *DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return "", nil, err @@ -299,6 +299,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, _ *Engine, config *Das // Persist the etag in state. config.Etag = createResp.Etag + engine.SaveState(ctx, createResp.DashboardId, config) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index d53ac29dade..784daddb7fa 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, engine *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -124,6 +124,7 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the branch to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index f8d3f51f6d3..b4574307118 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, engine *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ @@ -109,6 +109,7 @@ func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, confi if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) result, err := waiter.Wait(ctx) if err != nil { diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 91e43df1028..62d10cead17 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, engine *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -163,6 +163,7 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, conf if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the operation to complete result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index 35f223a534d..fcc9e24f9c7 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, engine *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -127,6 +127,7 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, confi if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) // Wait for the project to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index 22f3c4d89c1..d90f8fc89b6 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, engine *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ @@ -108,6 +108,7 @@ func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, c if err != nil { return "", nil, err } + engine.SaveState(ctx, waiter.Name(), config) result, err := waiter.Wait(ctx) if err != nil { From 7ba5d9500e73918e123bc0e054c0785c834db636 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 15:31:20 +0200 Subject: [PATCH 07/21] direct: save state before publishing dashboard; add acceptance tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dashboard.DoCreate now calls engine.SaveState immediately after the dashboard is created (with etag persisted), before publishDashboard. A failed publish leaves the draft tracked in state rather than orphaned; the next deploy finds it via DoRead and re-publishes via DoUpdate without recreating the dashboard. The old trash-on-publish-failure cleanup is removed — it was a fragile workaround for the lack of state persistence and is now unnecessary. Acceptance tests: - publish-failure-cleans-up-dashboard: updated to reflect the new behavior per engine (direct: draft persists with URL; terraform: existing behavior, cleaned up). Output files split to per-engine variants (out.summary.*.txt, out.dashboardrequests.*.txt). - publish-failure-retry (new, direct only): verifies end-to-end that a transient publish failure leaves the draft in state (summary shows URL, plan detects diff), and the subsequent deploy re-publishes without issuing a CREATE call. Co-authored-by: Denis Bilenko --- .../out.dashboardrequests.direct.txt | 39 +++++++++++ ...xt => out.dashboardrequests.terraform.txt} | 0 .../out.deploy.direct.txt | 1 + .../out.summary.direct.txt | 12 ++++ .../out.summary.terraform.txt | 12 ++++ .../output.txt | 12 ---- .../script | 8 +-- .../dashboard.lvdash.json | 1 + .../publish-failure-retry/databricks.yml.tmpl | 9 +++ .../publish-failure-retry/out.test.toml | 4 ++ .../publish-failure-retry/output.txt | 64 +++++++++++++++++++ .../dashboards/publish-failure-retry/script | 30 +++++++++ .../publish-failure-retry/test.toml | 12 ++++ bundle/direct/dresources/dashboard.go | 13 +--- 14 files changed, 191 insertions(+), 26 deletions(-) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt rename acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/{out.dashboardrequests.txt => out.dashboardrequests.terraform.txt} (100%) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/script create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt new file mode 100644 index 00000000000..3e94771bdac --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.direct.txt @@ -0,0 +1,39 @@ +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/artifacts/.internal" + } +} +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/files" + } +} +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/resources" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards", + "body": { + "display_name": "my dashboard", + "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default/resources", + "serialized_dashboard": "{\"pages\":[{\"name\":\"test-page\",\"displayName\":\"Test Dashboard\"}]}\n", + "warehouse_id": "doesnotexist" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", + "body": { + "embed_credentials": false, + "warehouse_id": "doesnotexist" + } +} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.terraform.txt similarity index 100% rename from acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.txt rename to acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.dashboardrequests.terraform.txt diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt index 705bd09cb32..84918b848bf 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.deploy.direct.txt @@ -9,5 +9,6 @@ HTTP Status: 400 Bad Request API error_code: RESOURCE_DOES_NOT_EXIST API message: Warehouse doesnotexist does not exist +Updating deployment state... Exit code: 1 diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt new file mode 100644 index 00000000000..52c168b5302 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle summary +Name: publish-failure-cleans-up-dashboard +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt new file mode 100644 index 00000000000..37d00329c77 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.terraform.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle summary +Name: publish-failure-cleans-up-dashboard +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: (not deployed) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt index 37d00329c77..e69de29bb2d 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/output.txt @@ -1,12 +0,0 @@ - ->>> [CLI] bundle summary -Name: publish-failure-cleans-up-dashboard -Target: default -Workspace: - User: [USERNAME] - Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-cleans-up-dashboard/default -Resources: - Dashboards: - dashboard1: - Name: my dashboard - URL: (not deployed) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script index 693a4320211..e8062b99e6e 100755 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script @@ -4,9 +4,9 @@ envsubst < databricks.yml.tmpl > databricks.yml # Deploy the dashboard. The dashboard will be created but publish will fail because the warehouse does not exist. errcode trace $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt -trace $CLI bundle summary +# After publish failure the dashboard draft should be in state (direct) or cleaned up (terraform). +trace $CLI bundle summary &>> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt -# API should record a DELETE call to clean up the draft dashboard that was not published. -# Request sequence is identical across terraform and direct modes. +# API request sequence differs between engines (direct: no DELETE; terraform: DELETE to clean up). unset MSYS_NO_PATHCONV -print_requests.py //lakeview/dashboards //workspace/mkdirs > out.dashboardrequests.txt +print_requests.py //lakeview/dashboards //workspace/mkdirs > out.dashboardrequests.$DATABRICKS_BUNDLE_ENGINE.txt diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json b/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json new file mode 100644 index 00000000000..0bfc5797ff0 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/dashboard.lvdash.json @@ -0,0 +1 @@ +{"pages":[{"name":"test-page","displayName":"Test Dashboard"}]} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl b/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl new file mode 100644 index 00000000000..553305fbfde --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/databricks.yml.tmpl @@ -0,0 +1,9 @@ +bundle: + name: publish-failure-retry + +resources: + dashboards: + dashboard1: + display_name: my dashboard + warehouse_id: someid + file_path: ./dashboard.lvdash.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml new file mode 100644 index 00000000000..a29f11b9ab2 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +RequiresWarehouse = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt new file mode 100644 index 00000000000..05fd57f6c99 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -0,0 +1,64 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... +Deploying resources... +Error: cannot create resources.dashboards.dashboard1: Fault injected by test. (400 INJECTED) + +Endpoint: POST [DATABRICKS_URL]/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published +HTTP Status: 400 Bad Request +API error_code: INJECTED +API message: Fault injected by test. + +Updating deployment state... + +Exit code: 1 + +>>> [CLI] bundle summary +Name: publish-failure-retry +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default +Resources: + Dashboards: + dashboard1: + Name: my dashboard + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + +>>> [CLI] bundle plan +update dashboards.dashboard1 + +Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! +{ + "method": "PATCH", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]", + "body": { + "display_name": "my dashboard", + "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/resources", + "serialized_dashboard": "{\"pages\":[{\"name\":\"test-page\",\"displayName\":\"Test Dashboard\"}]}\n", + "warehouse_id": "someid" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", + "body": { + "embed_credentials": false, + "warehouse_id": "someid" + } +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.dashboards.dashboard1 + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script new file mode 100644 index 00000000000..3b430628865 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -0,0 +1,30 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve + rm -f out.requests.txt +} +trap cleanup EXIT + +# Inject a single publish failure so the first deploy creates the dashboard +# draft but fails to publish it. +fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 + +# First deploy: dashboard is created and saved to state, but publish fails. +errcode trace $CLI bundle deploy + +# Dashboard should be in state (tracked) despite the publish failure. +trace $CLI bundle summary + +# Plan should show that publishing is still needed. +trace $CLI bundle plan + +# Discard first-deploy requests so the output only contains second-deploy +# calls, making it easy to confirm no CREATE was issued. +rm out.requests.txt + +# Second deploy: fault is gone; must publish the existing draft, not create a new one. +trace $CLI bundle deploy + +# Confirm: second deploy issued an UPDATE and a PUBLISH call but no CREATE. +print_requests.py //lakeview/dashboards diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml new file mode 100644 index 00000000000..ac1be44882d --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml @@ -0,0 +1,12 @@ +Cloud = false +Local = true +RecordRequests = true + +# Only run with the direct engine: the test verifies direct engine's SaveState +# behavior (draft persists on publish failure and is re-published on retry). +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct"] + +[[Repls]] +Old = "[0-9a-f]{32}" +New = "[DASHBOARD_ID]" diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index e0917c490b1..995b5e2f348 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/deployplan" - "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/structs/structpath" "github.com/databricks/cli/libs/utils" "github.com/databricks/databricks-sdk-go" @@ -299,6 +298,9 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config // Persist the etag in state. config.Etag = createResp.Etag + // Save state before publishing so an interrupted publish leaves a tracked + // draft rather than an orphan. The next deploy finds the draft in state and + // re-publishes via DoUpdate without recreating the dashboard. engine.SaveState(ctx, createResp.DashboardId, config) var publishResp *dashboards.PublishedDashboard @@ -306,16 +308,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config if config.Published { publishResp, err = r.publishDashboard(ctx, createResp.DashboardId, config) if err != nil { - // If the publish fails, we should delete the dashboard to avoid leaving it in a bad state. - deleteErr := r.client.Lakeview.Trash(ctx, dashboards.TrashDashboardRequest{ - DashboardId: createResp.DashboardId, - }) - if deleteErr != nil { - log.Warnf(ctx, "failed to delete draft dashboard %s after publish failed: %v", createResp.DashboardId, deleteErr) - return "", nil, deleteErr - } return "", nil, err - // QQQ: instead, we could store partial state with published=false } } From 21c9e21f63e60e4549dff8d9aeb943ad0617eeb7 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:04:04 +0200 Subject: [PATCH 08/21] acceptance/dashboards: normalize ?o=/?w= workspace param in URL replacements The local testserver uses ?o= and cloud uses ?w= for the workspace/org ID in dashboard published URLs. Add a parent-level [[Repls]] rule that maps both to ?[WSPARAM]= so the output files are environment-independent. Co-authored-by: Denis Bilenko --- .../out.summary.direct.txt | 2 +- .../resources/dashboards/publish-failure-retry/output.txt | 2 +- acceptance/bundle/resources/dashboards/test.toml | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt index 52c168b5302..39ec72db595 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/out.summary.direct.txt @@ -9,4 +9,4 @@ Resources: Dashboards: dashboard1: Name: my dashboard - URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt index 05fd57f6c99..f5ab0131d7d 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -23,7 +23,7 @@ Resources: Dashboards: dashboard1: Name: my dashboard - URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?o=[NUMID] + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] >>> [CLI] bundle plan update dashboards.dashboard1 diff --git a/acceptance/bundle/resources/dashboards/test.toml b/acceptance/bundle/resources/dashboards/test.toml index d932390098f..ff86f677620 100644 --- a/acceptance/bundle/resources/dashboards/test.toml +++ b/acceptance/bundle/resources/dashboards/test.toml @@ -19,3 +19,8 @@ New = "[ETAG]" [[Repls]] Old = "\"[0-9]{8,}\"" New = "[ETAG]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = "\\?(o|w)=" +New = "?[WSPARAM]=" From da308cbf7d42469dc7bdf0f1c87023e4bf13c0cc Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:20:39 +0200 Subject: [PATCH 09/21] acceptance/dashboards: fix ?o=/?w= workspace param normalization Use per-test [[Repls]] rules (matching raw digits) in the two publish-failure tests so the URL parameter is normalized to ?[WSPARAM]=[NUMID] regardless of whether the testserver or cloud environment is used. Revert the over-broad parent-level rule that broke detect-change's existing ?[ow]=... cleanup. Co-authored-by: Denis Bilenko --- .../dashboards/publish-failure-cleans-up-dashboard/test.toml | 5 +++++ .../resources/dashboards/publish-failure-retry/test.toml | 5 +++++ acceptance/bundle/resources/dashboards/test.toml | 5 ----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml index a8ede7b099c..ed07dda2980 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/test.toml @@ -10,3 +10,8 @@ Response.Body = '{"error_code": "RESOURCE_DOES_NOT_EXIST", "message": "Warehouse [[Repls]] Old = "[0-9a-f]{32}" New = "[DASHBOARD_ID]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = '\?[ow]=\d+' +New = "?[WSPARAM]=[NUMID]" diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml index ac1be44882d..3d4dd86612e 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml @@ -10,3 +10,8 @@ DATABRICKS_BUNDLE_ENGINE = ["direct"] [[Repls]] Old = "[0-9a-f]{32}" New = "[DASHBOARD_ID]" + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = '\?[ow]=\d+' +New = "?[WSPARAM]=[NUMID]" diff --git a/acceptance/bundle/resources/dashboards/test.toml b/acceptance/bundle/resources/dashboards/test.toml index ff86f677620..d932390098f 100644 --- a/acceptance/bundle/resources/dashboards/test.toml +++ b/acceptance/bundle/resources/dashboards/test.toml @@ -19,8 +19,3 @@ New = "[ETAG]" [[Repls]] Old = "\"[0-9]{8,}\"" New = "[ETAG]" - -# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. -[[Repls]] -Old = "\\?(o|w)=" -New = "?[WSPARAM]=" From 4b584f19bc0f3ddb01131eeb86e2ad941c81b40d Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 2 Jun 2026 22:41:17 +0200 Subject: [PATCH 10/21] acceptance/dashboards: fix &>> redirect for bash 3.2 compatibility macOS ships bash 3.2 which does not support &>> (bash 4+). Replace with >> file 2>&1 which works on all bash versions. Co-authored-by: Denis Bilenko --- .../dashboards/publish-failure-cleans-up-dashboard/script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script index e8062b99e6e..e28caa3d0de 100755 --- a/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-cleans-up-dashboard/script @@ -5,7 +5,7 @@ envsubst < databricks.yml.tmpl > databricks.yml errcode trace $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt # After publish failure the dashboard draft should be in state (direct) or cleaned up (terraform). -trace $CLI bundle summary &>> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt +trace $CLI bundle summary >> out.summary.$DATABRICKS_BUNDLE_ENGINE.txt 2>&1 # API request sequence differs between engines (direct: no DELETE; terraform: DELETE to clean up). unset MSYS_NO_PATHCONV From e160704158eba8c93804382e2046deca3143e92e Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 10:51:38 +0200 Subject: [PATCH 11/21] acceptance/dashboards: unset MSYS_NO_PATHCONV before fault.py on Windows MSYS_NO_PATHCONV=1 (set in the parent test.toml) prevents MSYS2 from converting POSIX paths to Windows paths, causing Python to receive a broken path (/c/a/... instead of C:\a\...) when invoking fault.py. Unset it before the fault.py call, matching the pattern used by other dashboard scripts before their bin helper invocations. Co-authored-by: Denis Bilenko --- .../bundle/resources/dashboards/publish-failure-retry/script | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script index 3b430628865..2096b92ee67 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -6,6 +6,10 @@ cleanup() { } trap cleanup EXIT +# unset MSYS_NO_PATHCONV so MSYS2 converts the script path to a Windows path +# when invoking the Python interpreter (required for fault.py to be found on Windows). +unset MSYS_NO_PATHCONV + # Inject a single publish failure so the first deploy creates the dashboard # draft but fails to publish it. fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 From 3f1195144b732263ca62d45efd2525b941e59736 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 13:33:39 +0200 Subject: [PATCH 12/21] direct: Engine dedup+logging; dashboard saves draft state as Published=false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Engine.SaveState: - Accepts resourceKey for logging: "SaveState: resources.X id=Y N bytes: {...}" - Skips WAL write if state is unchanged (structdiff.IsEqual), logging a skip message. - Records the last saved value in e.lastSaved for subsequent comparisons. Dashboard DoCreate: - Saves intermediate state with Published=false (the actual draft state) instead of the user's Published=true. This ensures the planner sees a real diff (false→true) on the next deploy if publish is interrupted, rather than treating the resource as up-to-date and silently skipping the publish. publish-failure-retry acceptance test: - Adds `bundle plan -o json` and extracts the published change to verify old=false, new=true in the plan after a failed publish. README.md: update stale SetID+SaveState reference to current SaveState(ctx, id, state) API. Co-authored-by: Denis Bilenko --- .../out.plan_published_change.json | 6 ++++ .../publish-failure-retry/output.txt | 11 +++++-- .../dashboards/publish-failure-retry/script | 6 ++-- bundle/direct/apply.go | 4 +-- bundle/direct/dresources/README.md | 2 +- bundle/direct/dresources/dashboard.go | 10 +++++-- bundle/direct/dresources/engine.go | 30 +++++++++++++++---- 7 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json new file mode 100644 index 00000000000..0a05a1b42d3 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json @@ -0,0 +1,6 @@ +{ + "action": "update", + "old": false, + "new": true, + "remote": false +} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt index f5ab0131d7d..b1f7b6bb3b3 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -25,10 +25,15 @@ Resources: Name: my dashboard URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] ->>> [CLI] bundle plan -update dashboards.dashboard1 +>>> [CLI] bundle plan -o json -Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged +>>> cat out.plan_published_change.json +{ + "action": "update", + "old": false, + "new": true, + "remote": false +} >>> [CLI] bundle deploy Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/files... diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script index 2096b92ee67..c231427120b 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -20,8 +20,10 @@ errcode trace $CLI bundle deploy # Dashboard should be in state (tracked) despite the publish failure. trace $CLI bundle summary -# Plan should show that publishing is still needed. -trace $CLI bundle plan +# Plan should show published: false (saved state) -> true (desired). +# Capture the Changes entry for 'published' to verify the diff direction. +trace $CLI bundle plan -o json | jq '.plan["resources.dashboards.dashboard1"].changes.published' > out.plan_published_change.json +trace cat out.plan_published_change.json # Discard first-deploy requests so the output only contains second-deploy # calls, making it easy to confirm no CREATE was issued. diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index 3380e5018b6..dd631d5e14a 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,7 +51,7 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { - engine := dresources.NewEngine(d.Adapter.StateType(), func(id string, x any) error { + engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(id string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) @@ -126,7 +126,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } - engine := dresources.NewEngine(d.Adapter.StateType(), func(_ string, x any) error { + engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(_ string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) remoteState, err := retryOnTransient(ctx, func() (any, error) { diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index b9c6e4754e2..1c2e44d68f0 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -36,7 +36,7 @@ If a resource has fields that must not be sent in updates (deploy-only, lifecycl ## Async APIs -For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SetID(id)` then `engine.SaveState(config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. +For resources whose create or update is asynchronous, poll inline inside `DoCreate`/`DoUpdate` after the initial API call. To prevent orphaning if deployment is interrupted during a long wait, call `engine.SaveState(ctx, id, config)` immediately after the resource is created and before any waiting. The framework provides a `*Engine` as the second argument to both methods. ## Slice ordering: KeyedSlices diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 995b5e2f348..36158237194 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -298,10 +298,14 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config // Persist the etag in state. config.Etag = createResp.Etag - // Save state before publishing so an interrupted publish leaves a tracked - // draft rather than an orphan. The next deploy finds the draft in state and - // re-publishes via DoUpdate without recreating the dashboard. + // Save state with Published=false: the dashboard exists as a draft; publish + // has not succeeded yet. Using Published=false ensures the planner sees a + // real diff (false→true) if publish is interrupted, triggering a DoUpdate + // on the next deploy instead of silently treating the resource as up-to-date. + savedPublished := config.Published + config.Published = false engine.SaveState(ctx, createResp.DashboardId, config) + config.Published = savedPublished var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/engine.go index 46deca6600d..0172f287ff1 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/engine.go @@ -2,35 +2,41 @@ package dresources import ( "context" + "encoding/json" "fmt" "reflect" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/structs/structdiff" ) // Engine provides state persistence to resource implementations. // Pass it to DoCreate or DoUpdate to save intermediate state before long-running // wait operations, so the resource is not orphaned if deployment is interrupted. type Engine struct { - id string - stateType reflect.Type - saveFunc func(id string, x any) error + resourceKey string + id string + stateType reflect.Type + saveFunc func(id string, x any) error + lastSaved any } // NewEngine creates an Engine with the given state type and save function. // The framework calls this before invoking DoCreate or DoUpdate. -func NewEngine(stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { - return &Engine{id: "", stateType: stateType, saveFunc: saveFunc} +func NewEngine(resourceKey string, stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { + return &Engine{resourceKey: resourceKey, id: "", stateType: stateType, saveFunc: saveFunc, lastSaved: nil} } // NewNopEngine creates an Engine that discards all saves. Use in tests. func NewNopEngine(stateType reflect.Type) *Engine { - return NewEngine(stateType, func(_ string, _ any) error { return nil }) + return NewEngine("", stateType, func(_ string, _ any) error { return nil }) } // SaveState saves the resource state. id must be the resource's identifier; on // the first call it is recorded, and subsequent calls panic if a different id is // passed. x must be a pointer to the same struct type as the resource's state. +// If the state is identical to what was last saved, the write is skipped. // Failures to persist state are logged but do not abort the deployment — the // resource already exists and aborting would not undo its creation. func (e *Engine) SaveState(ctx context.Context, id string, x any) { @@ -43,7 +49,19 @@ func (e *Engine) SaveState(ctx context.Context, id string, x any) { if xt != e.stateType { panic(fmt.Sprintf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt)) } + if e.lastSaved != nil && structdiff.IsEqual(e.lastSaved, x) { + log.Debugf(ctx, "SaveState: %s id=%s: skipping, state unchanged", e.resourceKey, id) + return + } + b, _ := json.Marshal(x) + preview := string(b) + if len(preview) > 100 { + preview = preview[:100] + } + log.Debugf(ctx, "SaveState: %s id=%s %d bytes: %s", e.resourceKey, id, len(b), preview) if err := e.saveFunc(e.id, x); err != nil { logdiag.LogError(ctx, err) + return } + e.lastSaved = x } From bf602f13ebcb9ba07ff96df3a52f697e57d058ea Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 3 Jun 2026 17:35:03 +0200 Subject: [PATCH 13/21] direct: inline WaitAfterCreate/WaitAfterUpdate into sql_warehouse DoCreate/DoUpdate Mergiraf reintroduced the standalone WaitAfterCreate and WaitAfterUpdate methods during the rebase against main (sql_warehouse.go had a merge conflict). Inline their wait logic directly into DoCreate and DoUpdate and remove the standalone methods, consistent with the Engine callback pattern used by other resources. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/sql_warehouse.go | 74 ++++++++++------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index 5dbd9d1d621..1225420febe 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -123,7 +123,31 @@ func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, _ *Engine, config * if err != nil { return "", nil, err } - return waiter.Id, nil, nil + id := waiter.Id + + if config.Lifecycle == nil || config.Lifecycle.Started == nil { + return id, nil, nil + } + + // Always wait for RUNNING first: warehouses start asynchronously. + _, err = r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) + if err != nil { + return "", nil, err + } + + if !*config.Lifecycle.Started { + // started=false: stop the warehouse after it reaches RUNNING. + stopWaiter, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) + if err != nil { + return "", nil, err + } + _, err = stopWaiter.Get() + if err != nil { + return "", nil, err + } + } + + return id, nil, nil } // hasWarehouseChanges reports whether the plan entry contains any Update changes @@ -170,59 +194,25 @@ func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *Engine, id strin desiredStarted := *config.Lifecycle.Started alreadyRunning := remoteWarehouseIsRunning(entry) if desiredStarted && !alreadyRunning { - // lifecycle.started=true: fire Start; WaitAfterUpdate polls for RUNNING. _, err := r.client.Warehouses.Start(ctx, sql.StartRequest{Id: id}) - return nil, err + if err != nil { + return nil, err + } } else if !desiredStarted && alreadyRunning { - // lifecycle.started=false: fire Stop; WaitAfterUpdate polls for STOPPED. _, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) - return nil, err - } - - return nil, nil -} - -// WaitAfterUpdate waits for the warehouse to reach the desired lifecycle state after DoUpdate. -func (r *ResourceSqlWarehouse) WaitAfterUpdate(ctx context.Context, id string, config *SqlWarehouseState) (*SqlWarehouseRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil + if err != nil { + return nil, err + } } - if *config.Lifecycle.Started { + if desiredStarted { _, err := r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) return nil, err } - _, err := r.client.Warehouses.WaitGetWarehouseStopped(ctx, id, 20*time.Minute, nil) return nil, err } -// WaitAfterCreate waits for the warehouse to be ready, then stops it if lifecycle.started=false. -// Warehouses are created in a starting state; WaitGetWarehouseRunning waits for them to be RUNNING. -func (r *ResourceSqlWarehouse) WaitAfterCreate(ctx context.Context, id string, config *SqlWarehouseState) (*SqlWarehouseRemote, error) { - if config.Lifecycle == nil || config.Lifecycle.Started == nil { - return nil, nil - } - - // Always wait for RUNNING first: warehouses start asynchronously. - _, err := r.client.Warehouses.WaitGetWarehouseRunning(ctx, id, 20*time.Minute, nil) - if err != nil { - return nil, err - } - - if !*config.Lifecycle.Started { - // started=false: stop the warehouse after it reaches RUNNING. - stopWaiter, err := r.client.Warehouses.Stop(ctx, sql.StopRequest{Id: id}) - if err != nil { - return nil, err - } - _, err = stopWaiter.Get() - return nil, err - } - - return nil, nil -} - func (r *ResourceSqlWarehouse) DoDelete(ctx context.Context, oldID string, _ *SqlWarehouseState) error { return r.client.Warehouses.DeleteById(ctx, oldID) } From 51392cd5fa2397cc56f263810f967e09b58091c8 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 4 Jun 2026 18:50:06 +0200 Subject: [PATCH 14/21] acceptance/dashboards: use replace_ids.py and inline plan output in publish-failure-retry - Replace fixed hex regex [[Repls]] with replace_ids.py called after the first deploy; the real dashboard ID is captured from state as [DASHBOARD1_ID]. - Inline the plan -o json | jq output directly into output.txt instead of saving to out.plan_published_change.json and trace cat-ing it. Co-authored-by: Denis Bilenko --- .../out.plan_published_change.json | 6 ------ .../dashboards/publish-failure-retry/output.txt | 10 ++++------ .../resources/dashboards/publish-failure-retry/script | 7 ++++--- .../dashboards/publish-failure-retry/test.toml | 4 ---- 4 files changed, 8 insertions(+), 19 deletions(-) delete mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json b/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json deleted file mode 100644 index 0a05a1b42d3..00000000000 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/out.plan_published_change.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "action": "update", - "old": false, - "new": true, - "remote": false -} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt index b1f7b6bb3b3..aa00db78f6a 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/output.txt @@ -4,7 +4,7 @@ Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-re Deploying resources... Error: cannot create resources.dashboards.dashboard1: Fault injected by test. (400 INJECTED) -Endpoint: POST [DATABRICKS_URL]/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published +Endpoint: POST [DATABRICKS_URL]/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]/published HTTP Status: 400 Bad Request API error_code: INJECTED API message: Fault injected by test. @@ -23,11 +23,9 @@ Resources: Dashboards: dashboard1: Name: my dashboard - URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD_ID]/published?[WSPARAM]=[NUMID] + URL: [DATABRICKS_URL]/dashboardsv3/[DASHBOARD1_ID]/published?[WSPARAM]=[NUMID] >>> [CLI] bundle plan -o json - ->>> cat out.plan_published_change.json { "action": "update", "old": false, @@ -42,7 +40,7 @@ Updating deployment state... Deployment complete! { "method": "PATCH", - "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]", "body": { "display_name": "my dashboard", "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-retry/default/resources", @@ -52,7 +50,7 @@ Deployment complete! } { "method": "POST", - "path": "/api/2.0/lakeview/dashboards/[DASHBOARD_ID]/published", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]/published", "body": { "embed_credentials": false, "warehouse_id": "someid" diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/script b/acceptance/bundle/resources/dashboards/publish-failure-retry/script index c231427120b..e776d2af933 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/script +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/script @@ -17,13 +17,14 @@ fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 # First deploy: dashboard is created and saved to state, but publish fails. errcode trace $CLI bundle deploy +# Capture the dashboard ID from state so subsequent output is normalized. +replace_ids.py + # Dashboard should be in state (tracked) despite the publish failure. trace $CLI bundle summary # Plan should show published: false (saved state) -> true (desired). -# Capture the Changes entry for 'published' to verify the diff direction. -trace $CLI bundle plan -o json | jq '.plan["resources.dashboards.dashboard1"].changes.published' > out.plan_published_change.json -trace cat out.plan_published_change.json +trace $CLI bundle plan -o json | jq '.plan["resources.dashboards.dashboard1"].changes.published' # Discard first-deploy requests so the output only contains second-deploy # calls, making it easy to confirm no CREATE was issued. diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml index 3d4dd86612e..177fe084644 100644 --- a/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry/test.toml @@ -7,10 +7,6 @@ RecordRequests = true [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["direct"] -[[Repls]] -Old = "[0-9a-f]{32}" -New = "[DASHBOARD_ID]" - # Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. [[Repls]] Old = '\?[ow]=\d+' From 72e62da3ef11b4ac23803d3d05e8982ebfc858bc Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 15:03:08 +0200 Subject: [PATCH 15/21] direct: save state before wait/publish in sql_warehouse DoCreate and dashboard DoUpdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more cases where state could be persisted earlier to avoid orphaning or stale-etag conflicts: sql_warehouse DoCreate: the warehouse is created, then DoCreate polls for RUNNING (and may Stop it). If interrupted during the wait, the warehouse was orphaned. Now SaveState is called right after Create returns the id, before the wait. dashboard DoUpdate: Update() bumps the server-side etag, then publishDashboard() can fail. Mirror the DoCreate fix — save the new etag with Published=false before publishing. This keeps the etag in sync (a stale etag makes the next Update fail with a conflict) and records published=false so the planner re-publishes next deploy. Resolves the pre-existing TODO. Add publish-failure-retry-on-update acceptance test: deploy, then trigger an update with an injected publish failure, and verify the update issued a PATCH (no CREATE) and the plan records published old=false. Co-authored-by: Denis Bilenko --- .../dashboard.lvdash.json | 1 + .../databricks.yml.tmpl | 9 +++ .../out.test.toml | 4 ++ .../output.txt | 56 +++++++++++++++++++ .../publish-failure-retry-on-update/script | 33 +++++++++++ .../publish-failure-retry-on-update/test.toml | 13 +++++ bundle/direct/dresources/dashboard.go | 12 +++- bundle/direct/dresources/sql_warehouse.go | 6 +- 8 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/dashboard.lvdash.json create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/databricks.yml.tmpl create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/out.test.toml create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/output.txt create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/script create mode 100644 acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/test.toml diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/dashboard.lvdash.json b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/dashboard.lvdash.json new file mode 100644 index 00000000000..0bfc5797ff0 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/dashboard.lvdash.json @@ -0,0 +1 @@ +{"pages":[{"name":"test-page","displayName":"Test Dashboard"}]} diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/databricks.yml.tmpl b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/databricks.yml.tmpl new file mode 100644 index 00000000000..de7419dcbdb --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/databricks.yml.tmpl @@ -0,0 +1,9 @@ +bundle: + name: publish-failure-retry-on-update + +resources: + dashboards: + dashboard1: + display_name: my dashboard + warehouse_id: someid + file_path: ./dashboard.lvdash.json diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/out.test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/out.test.toml new file mode 100644 index 00000000000..a29f11b9ab2 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +RequiresWarehouse = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/output.txt b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/output.txt new file mode 100644 index 00000000000..8017faf3041 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/output.txt @@ -0,0 +1,56 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry-on-update/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry-on-update/default/files... +Deploying resources... +Error: cannot update resources.dashboards.dashboard1: updating id=[DASHBOARD1_ID]: Fault injected by test. (400 INJECTED) + +Endpoint: POST [DATABRICKS_URL]/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]/published +HTTP Status: 400 Bad Request +API error_code: INJECTED +API message: Fault injected by test. + +Updating deployment state... + +Exit code: 1 +{ + "method": "PATCH", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]", + "body": { + "display_name": "my dashboard renamed", + "parent_path": "/Workspace/Users/[USERNAME]/.bundle/publish-failure-retry-on-update/default/resources", + "serialized_dashboard": "{\"pages\":[{\"name\":\"test-page\",\"displayName\":\"Test Dashboard\"}]}\n", + "warehouse_id": "someid" + } +} +{ + "method": "POST", + "path": "/api/2.0/lakeview/dashboards/[DASHBOARD1_ID]/published", + "body": { + "embed_credentials": false, + "warehouse_id": "someid" + } +} + +>>> [CLI] bundle plan -o json +{ + "action": "skip", + "reason": "remote_already_set", + "old": false, + "new": true, + "remote": true +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.dashboards.dashboard1 + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/publish-failure-retry-on-update/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/script b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/script new file mode 100644 index 00000000000..27471ef05f6 --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/script @@ -0,0 +1,33 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve + rm -f out.requests.txt +} +trap cleanup EXIT + +# unset MSYS_NO_PATHCONV so MSYS2 converts the script path to a Windows path +# when invoking the Python interpreter (required for fault.py to be found on Windows). +unset MSYS_NO_PATHCONV + +# First deploy succeeds fully: the dashboard is created and published. +trace $CLI bundle deploy +replace_ids.py +rm out.requests.txt + +# Inject a single publish failure for the update below. +fault.py "POST /api/2.0/lakeview/dashboards/*" 400 0 1 + +# Change the dashboard to trigger an Update, which bumps the server-side etag. +update_file.py databricks.yml "my dashboard" "my dashboard renamed" + +# Deploy: Update (PATCH) succeeds and bumps the etag, but the publish (POST) fails. +errcode trace $CLI bundle deploy + +# The failed deploy issued a PATCH (update) and a POST (publish), but no CREATE +# (POST /api/2.0/lakeview/dashboards): the existing dashboard was updated in place. +print_requests.py //lakeview/dashboards + +# Plan shows published old=false: state was saved with published=false (and the bumped +# etag) before the failed publish, so the next deploy knows publishing is still pending. +trace $CLI bundle plan -o json | jq '.plan["resources.dashboards.dashboard1"].changes.published' diff --git a/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/test.toml b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/test.toml new file mode 100644 index 00000000000..5272a2289ad --- /dev/null +++ b/acceptance/bundle/resources/dashboards/publish-failure-retry-on-update/test.toml @@ -0,0 +1,13 @@ +Cloud = false +Local = true +RecordRequests = true + +# Only run with the direct engine: the test verifies direct engine's SaveState +# behavior during DoUpdate (new etag + published=false persisted before a failed publish). +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct"] + +# Dashboard published URLs use ?o= (local testserver) or ?w= (cloud) for the workspace/org ID. +[[Repls]] +Old = '\?[ow]=\d+' +New = "?[WSPARAM]=[NUMID]" diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 36158237194..6410db50723 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -319,7 +319,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config return createResp.DashboardId, responseToState(createResp, publishResp, dashboard.SerializedDashboard, config.Published), nil } -func (r *ResourceDashboard) DoUpdate(ctx context.Context, _ *Engine, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { +func (r *ResourceDashboard) DoUpdate(ctx context.Context, engine *Engine, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return nil, err @@ -339,15 +339,21 @@ func (r *ResourceDashboard) DoUpdate(ctx context.Context, _ *Engine, id string, return nil, err } - // Persist the etag in state. + // Persist the new etag with Published=false before publishing. Update() bumps the + // etag on the server; if a subsequent publish fails, saving here keeps the etag in + // sync (a stale etag would make the next Update fail with a conflict) and records + // published=false so the planner re-publishes on the next deploy. config.Etag = updateResp.Etag + savedPublished := config.Published + config.Published = false + engine.SaveState(ctx, id, config) + config.Published = savedPublished var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). if config.Published { publishResp, err = r.publishDashboard(ctx, id, config) if err != nil { - // TODO: store partial state with published=false? return nil, err } } diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index 1225420febe..be7cb9b3212 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -118,13 +118,17 @@ func (r *ResourceSqlWarehouse) DoRead(ctx context.Context, id string) (*SqlWareh } // DoCreate creates the warehouse and returns its id. -func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, _ *Engine, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, engine *Engine, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { waiter, err := r.client.Warehouses.Create(ctx, config.CreateWarehouseRequest) if err != nil { return "", nil, err } id := waiter.Id + // Save state immediately after the warehouse is created so it is not orphaned + // if the subsequent wait or stop is interrupted. + engine.SaveState(ctx, id, config) + if config.Lifecycle == nil || config.Lifecycle.Started == nil { return id, nil, nil } From 775a0c9eeab05dc3cef07414dfcccae0a2e61772 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 17:07:36 +0200 Subject: [PATCH 16/21] direct: save state before wait in model_serving_endpoint DoUpdate DoUpdate applies up to four sequential mutating calls (tags, AI gateway, config, notifications) and then waits up to 35 minutes for the endpoint to finish updating. If that wait was interrupted, none of the applied changes were persisted to state, forcing them to be re-applied on the next deploy. Save the new config right after the mutating calls and before the wait, mirroring DoCreate which already saves before waitForEndpointReady. The id (endpoint name) matches the one DoCreate persists, so there is no id mismatch. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/model_serving_endpoint.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 091effa0631..85cd8c2a203 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -288,7 +288,7 @@ func (r *ResourceModelServingEndpoint) updateTags(ctx context.Context, id string return nil } -func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, engine *Engine, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { var err error // Terraform makes these API calls sequentially. We do the same here. @@ -322,6 +322,10 @@ func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, _ *Engine, } } + // All mutating calls have been applied; save the new config before the wait so + // an interrupted wait does not require re-applying them on the next deploy. + engine.SaveState(ctx, id, config) + return r.waitForEndpointReady(ctx, config.Name) } From 694e04877bf5b2810c93079dcef537fd5524f8b9 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 17:18:59 +0200 Subject: [PATCH 17/21] direct: revert postgres SaveState (waiter.Name() is the LRO operation, not resource id) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous attempt to save state before the async wait in postgres DoCreate used engine.SaveState(ctx, waiter.Name(), config). waiter.Name() returns the LRO operation name (e.g. .../branches/foo/operations/UUID), not the resource name (…/branches/foo) that DoCreate ultimately returns. Using the operation name as the id creates a dead WAL entry and does not help with orphan recovery. Replace with a TODO comment explaining the two options for a proper fix: 1. Derive the resource name from request inputs (Parent + resource-type + Id). 2. Use waiter.Metadata() if it surfaces the resource name before Wait completes. No panic in production: the framework saves state via db.SaveState (not through the Engine) after DoCreate returns, so the mismatch between operation-name and real-id never hits the Engine's id-mismatch check. The test panic was a test artifact where the same Engine was reused across Create→Update. Co-authored-by: Denis Bilenko --- bundle/direct/dresources/postgres_branch.go | 8 ++++++-- bundle/direct/dresources/postgres_catalog.go | 8 ++++++-- bundle/direct/dresources/postgres_endpoint.go | 8 ++++++-- bundle/direct/dresources/postgres_project.go | 8 ++++++-- bundle/direct/dresources/postgres_synced_table.go | 8 ++++++-- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index 784daddb7fa..41ae5c4a66a 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, engine *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -124,7 +124,11 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, engine *Engine, c if err != nil { return "", nil, err } - engine.SaveState(ctx, waiter.Name(), config) + // TODO: save state before the wait to prevent orphaning on interruption. + // waiter.Name() returns the LRO operation name (e.g. .../operations/UUID), + // not the real resource name. We need the resource name to save a valid state + // entry; options: (1) derive it from input (Parent + resource-type + Id), + // (2) call waiter.Metadata() if it exposes the resource name early. // Wait for the branch to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index b4574307118..80a6161b080 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, engine *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ @@ -109,7 +109,11 @@ func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, engine *Engine, if err != nil { return "", nil, err } - engine.SaveState(ctx, waiter.Name(), config) + // TODO: save state before the wait to prevent orphaning on interruption. + // waiter.Name() returns the LRO operation name (e.g. .../operations/UUID), + // not the real resource name. We need the resource name to save a valid state + // entry; options: (1) derive it from input (Parent + resource-type + Id), + // (2) call waiter.Metadata() if it exposes the resource name early. result, err := waiter.Wait(ctx) if err != nil { diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 62d10cead17..04a1bb201ff 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, engine *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -163,7 +163,11 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, engine *Engine, if err != nil { return "", nil, err } - engine.SaveState(ctx, waiter.Name(), config) + // TODO: save state before the wait to prevent orphaning on interruption. + // waiter.Name() returns the LRO operation name (e.g. .../operations/UUID), + // not the real resource name. We need the resource name to save a valid state + // entry; options: (1) derive it from input (Parent + resource-type + Id), + // (2) call waiter.Metadata() if it exposes the resource name early. // Wait for the operation to complete result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index fcc9e24f9c7..f16dacc47d0 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, engine *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -127,7 +127,11 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, engine *Engine, if err != nil { return "", nil, err } - engine.SaveState(ctx, waiter.Name(), config) + // TODO: save state before the wait to prevent orphaning on interruption. + // waiter.Name() returns the LRO operation name (e.g. .../operations/UUID), + // not the real resource name. We need the resource name to save a valid state + // entry; options: (1) derive it from input (Parent + resource-type + Id), + // (2) call waiter.Metadata() if it exposes the resource name early. // Wait for the project to be ready (long-running operation) result, err := waiter.Wait(ctx) diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index d90f8fc89b6..7ceac7a42f9 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, engine *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ @@ -108,7 +108,11 @@ func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, engine *Engi if err != nil { return "", nil, err } - engine.SaveState(ctx, waiter.Name(), config) + // TODO: save state before the wait to prevent orphaning on interruption. + // waiter.Name() returns the LRO operation name (e.g. .../operations/UUID), + // not the real resource name. We need the resource name to save a valid state + // entry; options: (1) derive it from input (Parent + resource-type + Id), + // (2) call waiter.Metadata() if it exposes the resource name early. result, err := waiter.Wait(ctx) if err != nil { From 9cd0b641431feeb89d8c68d6e7327acc9c74406a Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 17:31:49 +0200 Subject: [PATCH 18/21] direct: route final Create/Update state saves through Engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously apply.go called db.SaveState directly after DoCreate/DoUpdate returned, bypassing the engine. This meant a DoCreate that called engine.SaveState with a wrong id (e.g. an LRO operation name instead of the real resource name) would go undetected — the final save would silently use the correct id. Route both final saves through the engine instead: - If DoCreate/DoUpdate called engine.SaveState with a wrong id, the engine's id-mismatch check panics, catching the bug immediately in tests. - If DoCreate/DoUpdate already saved identical state (e.g. the resource saved before a long wait), the engine's dedup skips the redundant write. - State-save I/O errors are logged internally by the engine and no longer abort the deployment (the resource was already created/updated successfully; aborting would confuse the user). UpdateWithID is left unchanged: DoUpdateWithID takes no Engine parameter. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index dd631d5e14a..a18e16bd4ab 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -79,10 +79,11 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) - if err != nil { - return fmt.Errorf("saving state after creating id=%s: %w", newID, err) - } + // Route the final save through the engine so that the id-mismatch check catches + // any DoCreate implementation that called engine.SaveState with a wrong id (e.g. + // an LRO operation name instead of the real resource name). The engine also + // deduplicates: if DoCreate already saved identical state, this is a no-op. + engine.SaveState(ctx, newID, newState) return nil } @@ -141,10 +142,9 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) - if err != nil { - return fmt.Errorf("saving state id=%s: %w", id, err) - } + // Route through the engine so the id-mismatch check fires if DoUpdate saved + // under a wrong id, and to deduplicate writes when DoUpdate already saved. + engine.SaveState(ctx, id, newState) return nil } From 339e386514d3bfa4d15f66ce06d99907a3a4d746 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 17:38:50 +0200 Subject: [PATCH 19/21] direct: rename Engine -> StateSaver The type does exactly one thing: save state. StateSaver names that directly. NewNopEngine -> NewNopStateSaver, engine.go -> state_saver.go. Co-authored-by: Denis Bilenko --- bundle/direct/apply.go | 4 ++-- bundle/direct/dresources/adapter.go | 12 ++++++------ bundle/direct/dresources/alert.go | 4 ++-- bundle/direct/dresources/all_test.go | 2 +- bundle/direct/dresources/app.go | 4 ++-- bundle/direct/dresources/app_test.go | 4 ++-- bundle/direct/dresources/catalog.go | 4 ++-- bundle/direct/dresources/cluster.go | 4 ++-- bundle/direct/dresources/dashboard.go | 4 ++-- bundle/direct/dresources/database_catalog.go | 4 ++-- bundle/direct/dresources/database_instance.go | 4 ++-- bundle/direct/dresources/experiment.go | 4 ++-- bundle/direct/dresources/external_location.go | 4 ++-- bundle/direct/dresources/grants.go | 4 ++-- bundle/direct/dresources/job.go | 4 ++-- bundle/direct/dresources/model.go | 4 ++-- .../dresources/model_serving_endpoint.go | 4 ++-- bundle/direct/dresources/permissions.go | 4 ++-- bundle/direct/dresources/pipeline.go | 4 ++-- bundle/direct/dresources/postgres_branch.go | 4 ++-- bundle/direct/dresources/postgres_catalog.go | 2 +- bundle/direct/dresources/postgres_endpoint.go | 4 ++-- bundle/direct/dresources/postgres_project.go | 4 ++-- .../direct/dresources/postgres_synced_table.go | 2 +- bundle/direct/dresources/quality_monitor.go | 4 ++-- bundle/direct/dresources/registered_model.go | 4 ++-- bundle/direct/dresources/schema.go | 4 ++-- bundle/direct/dresources/schema_test.go | 2 +- bundle/direct/dresources/secret_scope.go | 2 +- bundle/direct/dresources/secret_scope_acls.go | 4 ++-- bundle/direct/dresources/sql_warehouse.go | 4 ++-- .../dresources/{engine.go => state_saver.go} | 18 +++++++++--------- .../direct/dresources/synced_database_table.go | 4 ++-- .../dresources/vector_search_endpoint.go | 4 ++-- .../direct/dresources/vector_search_index.go | 2 +- bundle/direct/dresources/volume.go | 4 ++-- 36 files changed, 77 insertions(+), 77 deletions(-) rename bundle/direct/dresources/{engine.go => state_saver.go} (71%) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a18e16bd4ab..f142fce5bef 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,7 +51,7 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { - engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(id string, x any) error { + engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(id string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) @@ -127,7 +127,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } - engine := dresources.NewEngine(d.ResourceKey, d.Adapter.StateType(), func(_ string, x any) error { + engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(_ string, x any) error { return db.SaveState(d.ResourceKey, id, x, d.DependsOn) }) remoteState, err := retryOnTransient(ctx, func() (any, error) { diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index cfcd675f9b2..c8a21868d3f 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -54,13 +54,13 @@ type IResource interface { // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. // Call engine.SaveState(ctx, id, state) to persist intermediate state before long-running waits. - // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) - DoCreate(ctx context.Context, engine *Engine, newState any) (id string, remoteState any, e error) + // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, _ *StateSaver, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) + DoCreate(ctx context.Context, engine *StateSaver, newState any) (id string, remoteState any, e error) // [Optional] DoUpdate updates the resource. ID must not change as a result of this operation. Returns optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. - // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) - DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (remoteState any, e error) + // Example: func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *StateSaver, id string, newState *catalog.CreateSchema, entry *PlanEntry) (*catalog.SchemaInfo, error) + DoUpdate(ctx context.Context, engine *StateSaver, id string, newState any, entry *PlanEntry) (remoteState any, e error) // [Optional] DoUpdateWithID performs an update that may result in resource having a new ID. Returns new id and optionally remote state. DoUpdateWithID(ctx context.Context, id string, newState any) (newID string, remoteState any, e error) @@ -395,7 +395,7 @@ func normalizeNilPointer(v any) any { return v } -func (a *Adapter) DoCreate(ctx context.Context, engine *Engine, newState any) (string, any, error) { +func (a *Adapter) DoCreate(ctx context.Context, engine *StateSaver, newState any) (string, any, error) { outs, err := a.doCreate.Call(ctx, engine, newState) if err != nil { return "", nil, err @@ -413,7 +413,7 @@ func (a *Adapter) HasDoUpdate() bool { // DoUpdate updates the resource with the plan entry computed during plan. // Returns remote state if available, otherwise nil. -func (a *Adapter) DoUpdate(ctx context.Context, engine *Engine, id string, newState any, entry *PlanEntry) (any, error) { +func (a *Adapter) DoUpdate(ctx context.Context, engine *StateSaver, id string, newState any, entry *PlanEntry) (any, error) { if a.doUpdate == nil { return nil, errors.New("internal error: DoUpdate not found") } diff --git a/bundle/direct/dresources/alert.go b/bundle/direct/dresources/alert.go index af71e378d41..d648134288f 100644 --- a/bundle/direct/dresources/alert.go +++ b/bundle/direct/dresources/alert.go @@ -37,7 +37,7 @@ func (r *ResourceAlert) DoRead(ctx context.Context, id string) (*sql.AlertV2, er } // DoCreate creates the alert and returns its id. -func (r *ResourceAlert) DoCreate(ctx context.Context, _ *Engine, config *sql.AlertV2) (string, *sql.AlertV2, error) { +func (r *ResourceAlert) DoCreate(ctx context.Context, _ *StateSaver, config *sql.AlertV2) (string, *sql.AlertV2, error) { request := sql.CreateAlertV2Request{ Alert: *config, } @@ -49,7 +49,7 @@ func (r *ResourceAlert) DoCreate(ctx context.Context, _ *Engine, config *sql.Ale } // DoUpdate updates the alert in place. -func (r *ResourceAlert) DoUpdate(ctx context.Context, _ *Engine, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { +func (r *ResourceAlert) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *sql.AlertV2, _ *PlanEntry) (*sql.AlertV2, error) { request := sql.UpdateAlertV2Request{ Id: id, Alert: *config, diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index fb33d12cb9d..767841bd01a 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -827,7 +827,7 @@ func testCRUD(t *testing.T, group string, adapter *Adapter, client *databricks.W require.Error(t, err) // TODO: if errors.Is(err, databricks.ErrResourceDoesNotExist) {... } - nopEngine := NewNopEngine(adapter.StateType()) + nopEngine := NewNopStateSaver(adapter.StateType()) createdID, remoteStateFromCreate, err := adapter.DoCreate(ctx, nopEngine, newState) require.NoError(t, err, "DoCreate failed state=%v", newState) require.NotEmpty(t, createdID, "ID returned from DoCreate was empty") diff --git a/bundle/direct/dresources/app.go b/bundle/direct/dresources/app.go index f82de0e2363..a9546bcdccf 100644 --- a/bundle/direct/dresources/app.go +++ b/bundle/direct/dresources/app.go @@ -116,7 +116,7 @@ func (r *ResourceApp) DoRead(ctx context.Context, id string) (*AppRemote, error) return remote, nil } -func (r *ResourceApp) DoCreate(ctx context.Context, engine *Engine, config *AppState) (string, *AppRemote, error) { +func (r *ResourceApp) DoCreate(ctx context.Context, engine *StateSaver, config *AppState) (string, *AppRemote, error) { // Start app compute only when lifecycle.started=true is explicit. // For nil (omitted) or false, use no_compute=true (do not start compute). noCompute := config.Lifecycle == nil || config.Lifecycle.Started == nil || !*config.Lifecycle.Started @@ -185,7 +185,7 @@ var UpdateMaskFields = []string{ var updateMask = strings.Join(UpdateMaskFields, ",") -func (r *ResourceApp) DoUpdate(ctx context.Context, _ *Engine, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { +func (r *ResourceApp) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *AppState, entry *PlanEntry) (*AppRemote, error) { // Deploy-only fields (source_code_path, config, // git_source, lifecycle) are not part of apps.App and thus excluded from the request body. if hasAppChanges(entry) { diff --git a/bundle/direct/dresources/app_test.go b/bundle/direct/dresources/app_test.go index 19f2a35a332..f65aa1f82bd 100644 --- a/bundle/direct/dresources/app_test.go +++ b/bundle/direct/dresources/app_test.go @@ -36,7 +36,7 @@ func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) { require.NoError(t, err) r := (&ResourceApp{}).New(client) - name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopStateSaver(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) @@ -73,7 +73,7 @@ func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) { r := (&ResourceApp{}).New(client) ctx := t.Context() - name, _, err := r.DoCreate(ctx, NewNopEngine(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) + name, _, err := r.DoCreate(ctx, NewNopStateSaver(reflect.TypeFor[*AppState]()), &AppState{App: apps.App{Name: "test-app"}}) require.NoError(t, err) assert.Equal(t, "test-app", name) diff --git a/bundle/direct/dresources/catalog.go b/bundle/direct/dresources/catalog.go index d23cda3538f..dc9ca6c8f02 100644 --- a/bundle/direct/dresources/catalog.go +++ b/bundle/direct/dresources/catalog.go @@ -40,7 +40,7 @@ func (r *ResourceCatalog) DoRead(ctx context.Context, id string) (*catalog.Catal return r.client.Catalogs.GetByName(ctx, id) } -func (r *ResourceCatalog) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoCreate(ctx context.Context, _ *StateSaver, config *catalog.CreateCatalog) (string, *catalog.CatalogInfo, error) { response, err := r.client.Catalogs.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -49,7 +49,7 @@ func (r *ResourceCatalog) DoCreate(ctx context.Context, _ *Engine, config *catal } // DoUpdate updates the catalog in place and returns remote state. -func (r *ResourceCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { +func (r *ResourceCatalog) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *catalog.CreateCatalog, _ *PlanEntry) (*catalog.CatalogInfo, error) { updateRequest := catalog.UpdateCatalog{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index e62a8e99775..4bc0675c8eb 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -147,7 +147,7 @@ func (r *ResourceCluster) DoRead(ctx context.Context, id string) (*ClusterRemote return remote, nil } -func (r *ResourceCluster) DoCreate(ctx context.Context, engine *Engine, config *ClusterState) (string, *ClusterRemote, error) { +func (r *ResourceCluster) DoCreate(ctx context.Context, engine *StateSaver, config *ClusterState) (string, *ClusterRemote, error) { wait, err := r.client.Clusters.Create(ctx, makeCreateCluster(&config.ClusterSpec)) if err != nil { return "", nil, err @@ -185,7 +185,7 @@ func hasClusterChanges(entry *PlanEntry) bool { return entry.Changes.HasChangeExcept("lifecycle", "lifecycle.started") } -func (r *ResourceCluster) DoUpdate(ctx context.Context, _ *Engine, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { +func (r *ResourceCluster) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *ClusterState, entry *PlanEntry) (*ClusterRemote, error) { if hasClusterChanges(entry) { // Same retry as in TF provider logic // https://github.com/databricks/terraform-provider-databricks/blob/3eecd0f90cf99d7777e79a3d03c41f9b2aafb004/clusters/resource_cluster.go#L624 diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 6410db50723..94adef397be 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -263,7 +263,7 @@ func responseToState(createOrUpdateResp *dashboards.Dashboard, publishResp *dash } } -func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config *DashboardState) (string, *DashboardState, error) { +func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *StateSaver, config *DashboardState) (string, *DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return "", nil, err @@ -319,7 +319,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *Engine, config return createResp.DashboardId, responseToState(createResp, publishResp, dashboard.SerializedDashboard, config.Published), nil } -func (r *ResourceDashboard) DoUpdate(ctx context.Context, engine *Engine, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { +func (r *ResourceDashboard) DoUpdate(ctx context.Context, engine *StateSaver, id string, config *DashboardState, _ *PlanEntry) (*DashboardState, error) { dashboard, err := prepareDashboardRequest(config) if err != nil { return nil, err diff --git a/bundle/direct/dresources/database_catalog.go b/bundle/direct/dresources/database_catalog.go index f5e18e8da27..8078beaa34b 100644 --- a/bundle/direct/dresources/database_catalog.go +++ b/bundle/direct/dresources/database_catalog.go @@ -24,7 +24,7 @@ func (r *ResourceDatabaseCatalog) DoRead(ctx context.Context, id string) (*datab return r.client.Database.GetDatabaseCatalogByName(ctx, id) } -func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, _ *Engine, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, _ *StateSaver, config *database.DatabaseCatalog) (string, *database.DatabaseCatalog, error) { result, err := r.client.Database.CreateDatabaseCatalog(ctx, database.CreateDatabaseCatalogRequest{ Catalog: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceDatabaseCatalog) DoCreate(ctx context.Context, _ *Engine, confi return result.Name, nil, nil } -func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { +func (r *ResourceDatabaseCatalog) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *database.DatabaseCatalog, _ *PlanEntry) (*database.DatabaseCatalog, error) { request := database.UpdateDatabaseCatalogRequest{ DatabaseCatalog: *config, Name: id, diff --git a/bundle/direct/dresources/database_instance.go b/bundle/direct/dresources/database_instance.go index c66a0bd092a..d0c46999681 100644 --- a/bundle/direct/dresources/database_instance.go +++ b/bundle/direct/dresources/database_instance.go @@ -25,7 +25,7 @@ func (d *ResourceDatabaseInstance) DoRead(ctx context.Context, id string) (*data return d.client.Database.GetDatabaseInstanceByName(ctx, id) } -func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { +func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *StateSaver, config *database.DatabaseInstance) (string, *database.DatabaseInstance, error) { waiter, err := d.client.Database.CreateDatabaseInstance(ctx, database.CreateDatabaseInstanceRequest{ DatabaseInstance: *config, }) @@ -50,7 +50,7 @@ func (d *ResourceDatabaseInstance) DoCreate(ctx context.Context, engine *Engine, return id, nil, err } -func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { +func (d *ResourceDatabaseInstance) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *database.DatabaseInstance, _ *PlanEntry) (*database.DatabaseInstance, error) { request := database.UpdateDatabaseInstanceRequest{ DatabaseInstance: *config, Name: config.Name, diff --git a/bundle/direct/dresources/experiment.go b/bundle/direct/dresources/experiment.go index 71f807d4987..fdc7cb528db 100644 --- a/bundle/direct/dresources/experiment.go +++ b/bundle/direct/dresources/experiment.go @@ -50,7 +50,7 @@ func (r *ResourceExperiment) DoRead(ctx context.Context, id string) (*ml.Experim return result.Experiment, nil } -func (r *ResourceExperiment) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateExperiment) (string, *ml.Experiment, error) { +func (r *ResourceExperiment) DoCreate(ctx context.Context, _ *StateSaver, config *ml.CreateExperiment) (string, *ml.Experiment, error) { result, err := r.client.Experiments.CreateExperiment(ctx, *config) if err != nil { return "", nil, err @@ -58,7 +58,7 @@ func (r *ResourceExperiment) DoCreate(ctx context.Context, _ *Engine, config *ml return result.ExperimentId, nil, nil } -func (r *ResourceExperiment) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { +func (r *ResourceExperiment) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *ml.CreateExperiment, _ *PlanEntry) (*ml.Experiment, error) { updateReq := ml.UpdateExperiment{ ExperimentId: id, NewName: config.Name, diff --git a/bundle/direct/dresources/external_location.go b/bundle/direct/dresources/external_location.go index be29bed5830..dfe14983cde 100644 --- a/bundle/direct/dresources/external_location.go +++ b/bundle/direct/dresources/external_location.go @@ -44,7 +44,7 @@ func (r *ResourceExternalLocation) DoRead(ctx context.Context, id string) (*cata return r.client.ExternalLocations.GetByName(ctx, id) } -func (r *ResourceExternalLocation) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoCreate(ctx context.Context, _ *StateSaver, config *catalog.CreateExternalLocation) (string, *catalog.ExternalLocationInfo, error) { response, err := r.client.ExternalLocations.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -53,7 +53,7 @@ func (r *ResourceExternalLocation) DoCreate(ctx context.Context, _ *Engine, conf } // DoUpdate updates the external location in place and returns remote state. -func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { +func (r *ResourceExternalLocation) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *catalog.CreateExternalLocation, _ *PlanEntry) (*catalog.ExternalLocationInfo, error) { updateRequest := catalog.UpdateExternalLocation{ Comment: config.Comment, CredentialName: config.CredentialName, diff --git a/bundle/direct/dresources/grants.go b/bundle/direct/dresources/grants.go index f03df836698..b90f7252d18 100644 --- a/bundle/direct/dresources/grants.go +++ b/bundle/direct/dresources/grants.go @@ -107,7 +107,7 @@ func (r *ResourceGrants) DoRead(ctx context.Context, id string) (*GrantsState, e }, nil } -func (r *ResourceGrants) DoCreate(ctx context.Context, engine *Engine, state *GrantsState) (string, *GrantsState, error) { +func (r *ResourceGrants) DoCreate(ctx context.Context, engine *StateSaver, state *GrantsState) (string, *GrantsState, error) { _, err := r.DoUpdate(ctx, engine, "", state, nil) if err != nil { // Grants Update is idempotent (additive PATCH), so retrying on transient errors is safe. @@ -117,7 +117,7 @@ func (r *ResourceGrants) DoCreate(ctx context.Context, engine *Engine, state *Gr return state.SecurableType + "/" + state.FullName, nil, nil } -func (r *ResourceGrants) DoUpdate(ctx context.Context, _ *Engine, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { +func (r *ResourceGrants) DoUpdate(ctx context.Context, _ *StateSaver, _ string, state *GrantsState, entry *PlanEntry) (*GrantsState, error) { if state.FullName == "" { return nil, errors.New("internal error: grants full_name must be resolved before deployment") } diff --git a/bundle/direct/dresources/job.go b/bundle/direct/dresources/job.go index efc013904b2..a68c3bdf58b 100644 --- a/bundle/direct/dresources/job.go +++ b/bundle/direct/dresources/job.go @@ -122,7 +122,7 @@ func makeJobRemote(job *jobs.Job) *JobRemote { } } -func (r *ResourceJob) DoCreate(ctx context.Context, _ *Engine, config *jobs.JobSettings) (string, *JobRemote, error) { +func (r *ResourceJob) DoCreate(ctx context.Context, _ *StateSaver, config *jobs.JobSettings) (string, *JobRemote, error) { request, err := makeCreateJob(*config) if err != nil { return "", nil, err @@ -134,7 +134,7 @@ func (r *ResourceJob) DoCreate(ctx context.Context, _ *Engine, config *jobs.JobS return strconv.FormatInt(response.JobId, 10), nil, nil } -func (r *ResourceJob) DoUpdate(ctx context.Context, _ *Engine, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { +func (r *ResourceJob) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *jobs.JobSettings, _ *PlanEntry) (*JobRemote, error) { request, err := makeResetJob(*config, id) if err != nil { return nil, err diff --git a/bundle/direct/dresources/model.go b/bundle/direct/dresources/model.go index 8b723f30603..1934ff22ecb 100644 --- a/bundle/direct/dresources/model.go +++ b/bundle/direct/dresources/model.go @@ -52,7 +52,7 @@ func (r *ResourceMlflowModel) DoRead(ctx context.Context, id string) (*MlflowMod }, nil } -func (r *ResourceMlflowModel) DoCreate(ctx context.Context, _ *Engine, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoCreate(ctx context.Context, _ *StateSaver, config *ml.CreateModelRequest) (string, *MlflowModelRemote, error) { response, err := r.client.ModelRegistry.CreateModel(ctx, *config) if err != nil { return "", nil, err @@ -62,7 +62,7 @@ func (r *ResourceMlflowModel) DoCreate(ctx context.Context, _ *Engine, config *m return response.RegisteredModel.Name, nil, nil } -func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { +func (r *ResourceMlflowModel) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *ml.CreateModelRequest, entry *PlanEntry) (*MlflowModelRemote, error) { updateRequest := ml.UpdateModelRequest{ Name: id, Description: config.Description, diff --git a/bundle/direct/dresources/model_serving_endpoint.go b/bundle/direct/dresources/model_serving_endpoint.go index 85cd8c2a203..cea4a5a9817 100644 --- a/bundle/direct/dresources/model_serving_endpoint.go +++ b/bundle/direct/dresources/model_serving_endpoint.go @@ -128,7 +128,7 @@ func (r *ResourceModelServingEndpoint) DoRead(ctx context.Context, id string) (* }, nil } -func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *Engine, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoCreate(ctx context.Context, engine *StateSaver, config *serving.CreateServingEndpoint) (string, *ModelServingEndpointRemote, error) { waiter, err := r.client.ServingEndpoints.Create(ctx, *config) if err != nil { return "", nil, err @@ -288,7 +288,7 @@ func (r *ResourceModelServingEndpoint) updateTags(ctx context.Context, id string return nil } -func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, engine *Engine, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { +func (r *ResourceModelServingEndpoint) DoUpdate(ctx context.Context, engine *StateSaver, id string, config *serving.CreateServingEndpoint, entry *PlanEntry) (*ModelServingEndpointRemote, error) { var err error // Terraform makes these API calls sequentially. We do the same here. diff --git a/bundle/direct/dresources/permissions.go b/bundle/direct/dresources/permissions.go index cca6f3a445a..0c943fa8f9c 100644 --- a/bundle/direct/dresources/permissions.go +++ b/bundle/direct/dresources/permissions.go @@ -215,7 +215,7 @@ func (r *ResourcePermissions) DoRead(ctx context.Context, id string) (*Permissio } // DoCreate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoCreate(ctx context.Context, engine *Engine, newState *PermissionsState) (string, *PermissionsState, error) { +func (r *ResourcePermissions) DoCreate(ctx context.Context, engine *StateSaver, newState *PermissionsState) (string, *PermissionsState, error) { // should we remember the default here? _, err := r.DoUpdate(ctx, engine, newState.ObjectID, newState, nil) if err != nil { @@ -227,7 +227,7 @@ func (r *ResourcePermissions) DoCreate(ctx context.Context, engine *Engine, newS } // DoUpdate calls https://docs.databricks.com/api/workspace/jobs/setjobpermissions. -func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ *Engine, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { +func (r *ResourcePermissions) DoUpdate(ctx context.Context, _ *StateSaver, _ string, newState *PermissionsState, _ *PlanEntry) (*PermissionsState, error) { extractedType, extractedID, err := parsePermissionsID(newState.ObjectID) if err != nil { return nil, err diff --git a/bundle/direct/dresources/pipeline.go b/bundle/direct/dresources/pipeline.go index e2e6aa12807..25e62b129f0 100644 --- a/bundle/direct/dresources/pipeline.go +++ b/bundle/direct/dresources/pipeline.go @@ -123,7 +123,7 @@ func makePipelineRemote(p *pipelines.GetPipelineResponse) *PipelineRemote { } } -func (r *ResourcePipeline) DoCreate(ctx context.Context, _ *Engine, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { +func (r *ResourcePipeline) DoCreate(ctx context.Context, _ *StateSaver, config *pipelines.CreatePipeline) (string, *PipelineRemote, error) { response, err := r.client.Pipelines.Create(ctx, *config) if err != nil { return "", nil, err @@ -131,7 +131,7 @@ func (r *ResourcePipeline) DoCreate(ctx context.Context, _ *Engine, config *pipe return response.PipelineId, nil, nil } -func (r *ResourcePipeline) DoUpdate(ctx context.Context, _ *Engine, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { +func (r *ResourcePipeline) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *pipelines.CreatePipeline, _ *PlanEntry) (*PipelineRemote, error) { request := pipelines.EditPipeline{ AllowDuplicateNames: config.AllowDuplicateNames, BudgetPolicyId: config.BudgetPolicyId, diff --git a/bundle/direct/dresources/postgres_branch.go b/bundle/direct/dresources/postgres_branch.go index 41ae5c4a66a..eb92ca7e67f 100644 --- a/bundle/direct/dresources/postgres_branch.go +++ b/bundle/direct/dresources/postgres_branch.go @@ -102,7 +102,7 @@ func (r *ResourcePostgresBranch) DoRead(ctx context.Context, id string) (*Postgr return makePostgresBranchRemote(branch), nil } -func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *StateSaver, config *PostgresBranchState) (string, *PostgresBranchRemote, error) { waiter, err := r.client.Postgres.CreateBranch(ctx, postgres.CreateBranchRequest{ BranchId: config.BranchId, Parent: config.Parent, @@ -140,7 +140,7 @@ func (r *ResourcePostgresBranch) DoCreate(ctx context.Context, _ *Engine, config return remote.Name, remote, nil } -func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { +func (r *ResourcePostgresBranch) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *PostgresBranchState, entry *PlanEntry) (*PostgresBranchRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Branch object, diff --git a/bundle/direct/dresources/postgres_catalog.go b/bundle/direct/dresources/postgres_catalog.go index 80a6161b080..bb9c8108e9b 100644 --- a/bundle/direct/dresources/postgres_catalog.go +++ b/bundle/direct/dresources/postgres_catalog.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresCatalog) DoRead(ctx context.Context, id string) (*Postg return makePostgresCatalogRemote(catalog), nil } -func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *Engine, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { +func (r *ResourcePostgresCatalog) DoCreate(ctx context.Context, _ *StateSaver, config *PostgresCatalogState) (string, *PostgresCatalogRemote, error) { waiter, err := r.client.Postgres.CreateCatalog(ctx, postgres.CreateCatalogRequest{ CatalogId: config.CatalogId, Catalog: postgres.Catalog{ diff --git a/bundle/direct/dresources/postgres_endpoint.go b/bundle/direct/dresources/postgres_endpoint.go index 04a1bb201ff..952982cc8af 100644 --- a/bundle/direct/dresources/postgres_endpoint.go +++ b/bundle/direct/dresources/postgres_endpoint.go @@ -141,7 +141,7 @@ func (r *ResourcePostgresEndpoint) waitForReconciliation(ctx context.Context, na } } -func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *StateSaver, config *PostgresEndpointState) (string, *PostgresEndpointRemote, error) { waiter, err := r.client.Postgres.CreateEndpoint(ctx, postgres.CreateEndpointRequest{ EndpointId: config.EndpointId, Parent: config.Parent, @@ -184,7 +184,7 @@ func (r *ResourcePostgresEndpoint) DoCreate(ctx context.Context, _ *Engine, conf return remote.Name, remote, nil } -func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { +func (r *ResourcePostgresEndpoint) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *PostgresEndpointState, entry *PlanEntry) (*PostgresEndpointRemote, error) { // Build update mask from fields that have action="update" in the changes map. // This excludes immutable fields and fields that haven't changed. // Prefix with "spec." because the API expects paths relative to the Endpoint object, diff --git a/bundle/direct/dresources/postgres_project.go b/bundle/direct/dresources/postgres_project.go index f16dacc47d0..4fc50866898 100644 --- a/bundle/direct/dresources/postgres_project.go +++ b/bundle/direct/dresources/postgres_project.go @@ -106,7 +106,7 @@ func (r *ResourcePostgresProject) DoRead(ctx context.Context, id string) (*Postg return makePostgresProjectRemote(project), nil } -func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *StateSaver, config *PostgresProjectState) (string, *PostgresProjectRemote, error) { waiter, err := r.client.Postgres.CreateProject(ctx, postgres.CreateProjectRequest{ ProjectId: config.ProjectId, Project: postgres.Project{ @@ -143,7 +143,7 @@ func (r *ResourcePostgresProject) DoCreate(ctx context.Context, _ *Engine, confi return remote.Name, remote, nil } -func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, _ *Engine, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { +func (r *ResourcePostgresProject) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *PostgresProjectState, entry *PlanEntry) (*PostgresProjectRemote, error) { // Build the mask from the plan's change list and prefix with "spec." (the // API expects paths relative to Project). The API rejects mask entries // that aren't also populated in the request body, and a wildcard "*" diff --git a/bundle/direct/dresources/postgres_synced_table.go b/bundle/direct/dresources/postgres_synced_table.go index 7ceac7a42f9..46742d91386 100644 --- a/bundle/direct/dresources/postgres_synced_table.go +++ b/bundle/direct/dresources/postgres_synced_table.go @@ -91,7 +91,7 @@ func (r *ResourcePostgresSyncedTable) DoRead(ctx context.Context, id string) (*P return makePostgresSyncedTableRemote(syncedTable), nil } -func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *Engine, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { +func (r *ResourcePostgresSyncedTable) DoCreate(ctx context.Context, _ *StateSaver, config *PostgresSyncedTableState) (string, *PostgresSyncedTableRemote, error) { waiter, err := r.client.Postgres.CreateSyncedTable(ctx, postgres.CreateSyncedTableRequest{ SyncedTableId: config.SyncedTableId, SyncedTable: postgres.SyncedTable{ diff --git a/bundle/direct/dresources/quality_monitor.go b/bundle/direct/dresources/quality_monitor.go index 7a0473ab543..b1e82f92cc9 100644 --- a/bundle/direct/dresources/quality_monitor.go +++ b/bundle/direct/dresources/quality_monitor.go @@ -72,7 +72,7 @@ func (r *ResourceQualityMonitor) DoRead(ctx context.Context, id string) (*catalo }) } -func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, _ *Engine, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, _ *StateSaver, config *QualityMonitorState) (string, *catalog.MonitorInfo, error) { req := config.CreateMonitor req.TableName = config.TableName //nolint:staticcheck // Direct quality_monitor resource still uses legacy monitor endpoints; v1 data-quality migration is separate work. @@ -83,7 +83,7 @@ func (r *ResourceQualityMonitor) DoCreate(ctx context.Context, _ *Engine, config return response.TableName, response, nil } -func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, _ *Engine, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { +func (r *ResourceQualityMonitor) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *QualityMonitorState, _ *PlanEntry) (*catalog.MonitorInfo, error) { updateRequest := catalog.UpdateMonitor{ TableName: id, BaselineTableName: config.BaselineTableName, diff --git a/bundle/direct/dresources/registered_model.go b/bundle/direct/dresources/registered_model.go index d1438a25bf5..b0dd2e0e8ad 100644 --- a/bundle/direct/dresources/registered_model.go +++ b/bundle/direct/dresources/registered_model.go @@ -55,7 +55,7 @@ func (r *ResourceRegisteredModel) DoRead(ctx context.Context, id string) (*catal }) } -func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, _ *StateSaver, config *catalog.CreateRegisteredModelRequest) (string, *catalog.RegisteredModelInfo, error) { response, err := r.client.RegisteredModels.Create(ctx, *config) if err != nil { return "", nil, err @@ -64,7 +64,7 @@ func (r *ResourceRegisteredModel) DoCreate(ctx context.Context, _ *Engine, confi return response.FullName, response, nil } -func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { +func (r *ResourceRegisteredModel) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *catalog.CreateRegisteredModelRequest, _ *PlanEntry) (*catalog.RegisteredModelInfo, error) { updateRequest := catalog.UpdateRegisteredModelRequest{ FullName: id, Comment: config.Comment, diff --git a/bundle/direct/dresources/schema.go b/bundle/direct/dresources/schema.go index 5895961fab7..cfa1715f6c6 100644 --- a/bundle/direct/dresources/schema.go +++ b/bundle/direct/dresources/schema.go @@ -37,7 +37,7 @@ func (r *ResourceSchema) DoRead(ctx context.Context, id string) (*catalog.Schema return r.client.Schemas.GetByFullName(ctx, id) } -func (r *ResourceSchema) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoCreate(ctx context.Context, _ *StateSaver, config *catalog.CreateSchema) (string, *catalog.SchemaInfo, error) { response, err := r.client.Schemas.Create(ctx, *config) if err != nil || response == nil { return "", nil, err @@ -46,7 +46,7 @@ func (r *ResourceSchema) DoCreate(ctx context.Context, _ *Engine, config *catalo } // DoUpdate updates the schema in place and returns remote state. -func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { +func (r *ResourceSchema) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *catalog.CreateSchema, _ *PlanEntry) (*catalog.SchemaInfo, error) { updateRequest := catalog.UpdateSchema{ Comment: config.Comment, EnablePredictiveOptimization: "", // Not supported by DABs diff --git a/bundle/direct/dresources/schema_test.go b/bundle/direct/dresources/schema_test.go index e3a66bf4cc6..fbb0e6c4dff 100644 --- a/bundle/direct/dresources/schema_test.go +++ b/bundle/direct/dresources/schema_test.go @@ -25,7 +25,7 @@ func TestResourceSchema_DoUpdate_WithUnsupportedForceSendFields(t *testing.T) { ForceSendFields: nil, } - nopEngine := NewNopEngine(reflect.TypeOf(config)) + nopEngine := NewNopStateSaver(reflect.TypeOf(config)) id, _, err := adapter.DoCreate(ctx, nopEngine, config) require.NoError(t, err) diff --git a/bundle/direct/dresources/secret_scope.go b/bundle/direct/dresources/secret_scope.go index a77fb1ebd3c..2a562e7898e 100644 --- a/bundle/direct/dresources/secret_scope.go +++ b/bundle/direct/dresources/secret_scope.go @@ -66,7 +66,7 @@ func (r *ResourceSecretScope) DoRead(ctx context.Context, id string) (*workspace return nil, fmt.Errorf("secret scope %q not found", id) } -func (r *ResourceSecretScope) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { +func (r *ResourceSecretScope) DoCreate(ctx context.Context, _ *StateSaver, state *SecretScopeConfig) (string, *workspace.SecretScope, error) { err := r.client.Secrets.CreateScope(ctx, state.CreateScope) if err != nil { return "", nil, err diff --git a/bundle/direct/dresources/secret_scope_acls.go b/bundle/direct/dresources/secret_scope_acls.go index 14cd05b8f91..1d7ed208893 100644 --- a/bundle/direct/dresources/secret_scope_acls.go +++ b/bundle/direct/dresources/secret_scope_acls.go @@ -92,7 +92,7 @@ func (r *ResourceSecretScopeAcls) RemapState(remote *SecretScopeAclsState) *Secr return remote } -func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, _ *Engine, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoCreate(ctx context.Context, _ *StateSaver, state *SecretScopeAclsState) (string, *SecretScopeAclsState, error) { err := r.setACLs(ctx, state.ScopeName, state.Acls) if err != nil { return "", nil, err @@ -109,7 +109,7 @@ func (r *ResourceSecretScopeAcls) DoUpdateWithID(ctx context.Context, id string, return state.ScopeName, nil, nil } -func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, _ *Engine, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { +func (r *ResourceSecretScopeAcls) DoUpdate(ctx context.Context, _ *StateSaver, id string, state *SecretScopeAclsState, _ *PlanEntry) (*SecretScopeAclsState, error) { _, _, err := r.DoUpdateWithID(ctx, id, state) return nil, err } diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index be7cb9b3212..b852e225ddb 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -118,7 +118,7 @@ func (r *ResourceSqlWarehouse) DoRead(ctx context.Context, id string) (*SqlWareh } // DoCreate creates the warehouse and returns its id. -func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, engine *Engine, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, engine *StateSaver, config *SqlWarehouseState) (string, *SqlWarehouseRemote, error) { waiter, err := r.client.Warehouses.Create(ctx, config.CreateWarehouseRequest) if err != nil { return "", nil, err @@ -161,7 +161,7 @@ func hasWarehouseChanges(entry *PlanEntry) bool { } // DoUpdate updates the warehouse in place. -func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *Engine, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { +func (r *ResourceSqlWarehouse) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *SqlWarehouseState, entry *PlanEntry) (*SqlWarehouseRemote, error) { if hasWarehouseChanges(entry) { request := sql.EditWarehouseRequest{ AutoStopMins: config.AutoStopMins, diff --git a/bundle/direct/dresources/engine.go b/bundle/direct/dresources/state_saver.go similarity index 71% rename from bundle/direct/dresources/engine.go rename to bundle/direct/dresources/state_saver.go index 0172f287ff1..439816b002c 100644 --- a/bundle/direct/dresources/engine.go +++ b/bundle/direct/dresources/state_saver.go @@ -11,10 +11,10 @@ import ( "github.com/databricks/cli/libs/structs/structdiff" ) -// Engine provides state persistence to resource implementations. +// StateSaver provides state persistence to resource implementations. // Pass it to DoCreate or DoUpdate to save intermediate state before long-running // wait operations, so the resource is not orphaned if deployment is interrupted. -type Engine struct { +type StateSaver struct { resourceKey string id string stateType reflect.Type @@ -22,15 +22,15 @@ type Engine struct { lastSaved any } -// NewEngine creates an Engine with the given state type and save function. +// NewStateSaver creates an StateSaver with the given state type and save function. // The framework calls this before invoking DoCreate or DoUpdate. -func NewEngine(resourceKey string, stateType reflect.Type, saveFunc func(id string, x any) error) *Engine { - return &Engine{resourceKey: resourceKey, id: "", stateType: stateType, saveFunc: saveFunc, lastSaved: nil} +func NewStateSaver(resourceKey string, stateType reflect.Type, saveFunc func(id string, x any) error) *StateSaver { + return &StateSaver{resourceKey: resourceKey, id: "", stateType: stateType, saveFunc: saveFunc, lastSaved: nil} } -// NewNopEngine creates an Engine that discards all saves. Use in tests. -func NewNopEngine(stateType reflect.Type) *Engine { - return NewEngine("", stateType, func(_ string, _ any) error { return nil }) +// NewNopStateSaver creates an StateSaver that discards all saves. Use in tests. +func NewNopStateSaver(stateType reflect.Type) *StateSaver { + return NewStateSaver("", stateType, func(_ string, _ any) error { return nil }) } // SaveState saves the resource state. id must be the resource's identifier; on @@ -39,7 +39,7 @@ func NewNopEngine(stateType reflect.Type) *Engine { // If the state is identical to what was last saved, the write is skipped. // Failures to persist state are logged but do not abort the deployment — the // resource already exists and aborting would not undo its creation. -func (e *Engine) SaveState(ctx context.Context, id string, x any) { +func (e *StateSaver) SaveState(ctx context.Context, id string, x any) { if e.id == "" { e.id = id } else if e.id != id { diff --git a/bundle/direct/dresources/synced_database_table.go b/bundle/direct/dresources/synced_database_table.go index 7041329ca7d..fe575b37308 100644 --- a/bundle/direct/dresources/synced_database_table.go +++ b/bundle/direct/dresources/synced_database_table.go @@ -24,7 +24,7 @@ func (r *ResourceSyncedDatabaseTable) DoRead(ctx context.Context, name string) ( return r.client.Database.GetSyncedDatabaseTableByName(ctx, name) } -func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, _ *Engine, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, _ *StateSaver, config *database.SyncedDatabaseTable) (string, *database.SyncedDatabaseTable, error) { result, err := r.client.Database.CreateSyncedDatabaseTable(ctx, database.CreateSyncedDatabaseTableRequest{ SyncedTable: *config, }) @@ -34,7 +34,7 @@ func (r *ResourceSyncedDatabaseTable) DoCreate(ctx context.Context, _ *Engine, c return result.Name, nil, nil } -func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, _ *Engine, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { +func (r *ResourceSyncedDatabaseTable) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *database.SyncedDatabaseTable, _ *PlanEntry) (*database.SyncedDatabaseTable, error) { request := database.UpdateSyncedDatabaseTableRequest{ SyncedTable: *config, Name: id, diff --git a/bundle/direct/dresources/vector_search_endpoint.go b/bundle/direct/dresources/vector_search_endpoint.go index 719901b79c3..80fbc601fe8 100644 --- a/bundle/direct/dresources/vector_search_endpoint.go +++ b/bundle/direct/dresources/vector_search_endpoint.go @@ -65,7 +65,7 @@ func (r *ResourceVectorSearchEndpoint) DoRead(ctx context.Context, id string) (* return newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Engine, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { +func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *StateSaver, config *vectorsearch.CreateEndpoint) (string, *VectorSearchEndpointRemote, error) { _, err := r.client.VectorSearchEndpoints.CreateEndpoint(ctx, *config) if err != nil { return "", nil, err @@ -83,7 +83,7 @@ func (r *ResourceVectorSearchEndpoint) DoCreate(ctx context.Context, engine *Eng return id, newVectorSearchEndpointRemote(info), nil } -func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, _ *Engine, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { +func (r *ResourceVectorSearchEndpoint) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *vectorsearch.CreateEndpoint, entry *PlanEntry) (*VectorSearchEndpointRemote, error) { if entry.Changes.HasChange(pathBudgetPolicyId) { _, err := r.client.VectorSearchEndpoints.UpdateEndpointBudgetPolicy(ctx, vectorsearch.PatchEndpointBudgetPolicyRequest{ EndpointName: id, diff --git a/bundle/direct/dresources/vector_search_index.go b/bundle/direct/dresources/vector_search_index.go index ce796bc9f93..7d15f54212a 100644 --- a/bundle/direct/dresources/vector_search_index.go +++ b/bundle/direct/dresources/vector_search_index.go @@ -121,7 +121,7 @@ func (r *ResourceVectorSearchIndex) DoRead(ctx context.Context, id string) (*Vec }, nil } -func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *Engine, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { +func (r *ResourceVectorSearchIndex) DoCreate(ctx context.Context, engine *StateSaver, config *VectorSearchIndexState) (string, *VectorSearchIndexRemote, error) { _, err := r.client.VectorSearchIndexes.CreateIndex(ctx, config.CreateVectorIndexRequest) if err != nil { return "", nil, err diff --git a/bundle/direct/dresources/volume.go b/bundle/direct/dresources/volume.go index 3291226b12d..4b1608f8947 100644 --- a/bundle/direct/dresources/volume.go +++ b/bundle/direct/dresources/volume.go @@ -42,7 +42,7 @@ func (r *ResourceVolume) DoRead(ctx context.Context, id string) (*catalog.Volume return r.client.Volumes.ReadByName(ctx, id) } -func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoCreate(ctx context.Context, _ *StateSaver, config *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) { response, err := r.client.Volumes.Create(ctx, *config) if err != nil { return "", nil, err @@ -50,7 +50,7 @@ func (r *ResourceVolume) DoCreate(ctx context.Context, _ *Engine, config *catalo return response.FullName, response, nil } -func (r *ResourceVolume) DoUpdate(ctx context.Context, _ *Engine, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { +func (r *ResourceVolume) DoUpdate(ctx context.Context, _ *StateSaver, id string, config *catalog.CreateVolumeRequestContent, _ *PlanEntry) (*catalog.VolumeInfo, error) { updateRequest := catalog.UpdateVolumeRequestContent{ Comment: config.Comment, Name: id, From e6131bc32fd34a644ab5ff640dca06b0d33c8a5e Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 19:46:33 +0200 Subject: [PATCH 20/21] direct: add SaveStateWith helper; avoid double JSON marshal in StateSaver - SaveStateWith[F any]: type-safe helper that temporarily sets a field to an intermediate value before saving, then restores it. Used to save Published=false before publishDashboard and Lifecycle=nil before warehouse/cluster lifecycle management, so the planner sees a real diff if deployment is interrupted mid-way. - Change saveFunc signature to func(id string, b json.RawMessage) error and add DeploymentState.SaveStateJSON to accept pre-marshaled bytes. StateSaver already marshals x to JSON for dedup; passing those bytes directly avoids a redundant marshal on every save. - Fix lastSaved aliasing: store []byte (JSON snapshot) instead of any pointer. SaveStateWith modifies the config, saves, then restores; a pointer-based lastSaved would point at the restored value, making the subsequent final save look like a no-op and leaving Published=false in the WAL. Co-authored-by: Isaac --- bundle/direct/apply.go | 8 +++--- bundle/direct/dresources/cluster.go | 7 ++--- bundle/direct/dresources/dashboard.go | 16 +++--------- bundle/direct/dresources/sql_warehouse.go | 7 ++--- bundle/direct/dresources/state_saver.go | 32 ++++++++++++++++------- bundle/direct/dstate/state.go | 21 +++++++++------ 6 files changed, 52 insertions(+), 39 deletions(-) diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index f142fce5bef..0687ae6a53a 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -51,8 +51,8 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { - engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(id string, x any) error { - return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(id string, b json.RawMessage) error { + return db.SaveStateJSON(d.ResourceKey, id, b, d.DependsOn) }) var newID string @@ -127,8 +127,8 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("internal error: DoUpdate not implemented for resource %s", d.ResourceKey) } - engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(_ string, x any) error { - return db.SaveState(d.ResourceKey, id, x, d.DependsOn) + engine := dresources.NewStateSaver(d.ResourceKey, d.Adapter.StateType(), func(_ string, b json.RawMessage) error { + return db.SaveStateJSON(d.ResourceKey, id, b, d.DependsOn) }) remoteState, err := retryOnTransient(ctx, func() (any, error) { return d.Adapter.DoUpdate(ctx, engine, id, newState, planEntry) diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index 4bc0675c8eb..40b215f99f4 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -154,9 +154,10 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *StateSaver, conf } id := wait.ClusterId - // Save state immediately after the cluster is created so it is not orphaned - // if the subsequent wait or terminate is interrupted. - engine.SaveState(ctx, id, config) + // Save with Lifecycle=nil: cluster exists but lifecycle has not been applied yet + // (it always starts RUNNING). If the subsequent wait or stop is interrupted, the + // planner sees a real diff (nil→desired) and re-applies lifecycle on the next deploy. + SaveStateWith(engine, ctx, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) // Always wait for RUNNING first: clusters start in PENDING state and must be polled. _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index 94adef397be..a47a2436917 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -298,14 +298,9 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *StateSaver, co // Persist the etag in state. config.Etag = createResp.Etag - // Save state with Published=false: the dashboard exists as a draft; publish - // has not succeeded yet. Using Published=false ensures the planner sees a - // real diff (false→true) if publish is interrupted, triggering a DoUpdate - // on the next deploy instead of silently treating the resource as up-to-date. - savedPublished := config.Published - config.Published = false - engine.SaveState(ctx, createResp.DashboardId, config) - config.Published = savedPublished + // Save with Published=false: draft exists, publish not yet done. Ensures the + // planner sees a real diff (false→true) if publish is interrupted. + SaveStateWith(engine, ctx, createResp.DashboardId, config, &config.Published, false) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). @@ -344,10 +339,7 @@ func (r *ResourceDashboard) DoUpdate(ctx context.Context, engine *StateSaver, id // sync (a stale etag would make the next Update fail with a conflict) and records // published=false so the planner re-publishes on the next deploy. config.Etag = updateResp.Etag - savedPublished := config.Published - config.Published = false - engine.SaveState(ctx, id, config) - config.Published = savedPublished + SaveStateWith(engine, ctx, id, config, &config.Published, false) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index b852e225ddb..fb079312395 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -125,9 +125,10 @@ func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, engine *StateSaver, } id := waiter.Id - // Save state immediately after the warehouse is created so it is not orphaned - // if the subsequent wait or stop is interrupted. - engine.SaveState(ctx, id, config) + // Save with Lifecycle=nil: warehouse exists but lifecycle has not been applied yet + // (it always starts RUNNING). If the subsequent wait or stop is interrupted, the + // planner sees a real diff (nil→desired) and re-applies lifecycle on the next deploy. + SaveStateWith(engine, ctx, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) if config.Lifecycle == nil || config.Lifecycle.Started == nil { return id, nil, nil diff --git a/bundle/direct/dresources/state_saver.go b/bundle/direct/dresources/state_saver.go index 439816b002c..8d7d9e0d71d 100644 --- a/bundle/direct/dresources/state_saver.go +++ b/bundle/direct/dresources/state_saver.go @@ -1,6 +1,7 @@ package dresources import ( + "bytes" "context" "encoding/json" "fmt" @@ -8,7 +9,6 @@ import ( "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/logdiag" - "github.com/databricks/cli/libs/structs/structdiff" ) // StateSaver provides state persistence to resource implementations. @@ -18,19 +18,33 @@ type StateSaver struct { resourceKey string id string stateType reflect.Type - saveFunc func(id string, x any) error - lastSaved any + saveFunc func(id string, b json.RawMessage) error + lastSaved []byte // JSON snapshot; stored by value to avoid aliasing with the live config pointer } // NewStateSaver creates an StateSaver with the given state type and save function. // The framework calls this before invoking DoCreate or DoUpdate. -func NewStateSaver(resourceKey string, stateType reflect.Type, saveFunc func(id string, x any) error) *StateSaver { +func NewStateSaver(resourceKey string, stateType reflect.Type, saveFunc func(id string, b json.RawMessage) error) *StateSaver { return &StateSaver{resourceKey: resourceKey, id: "", stateType: stateType, saveFunc: saveFunc, lastSaved: nil} } // NewNopStateSaver creates an StateSaver that discards all saves. Use in tests. func NewNopStateSaver(stateType reflect.Type) *StateSaver { - return NewStateSaver("", stateType, func(_ string, _ any) error { return nil }) + return NewStateSaver("", stateType, func(_ string, _ json.RawMessage) error { return nil }) +} + +// SaveStateWith saves state with field temporarily set to value, then restores it. +// This is useful when the actual current state of a field differs from its desired +// value in config — e.g. saving started=true before a stop, or published=false +// before a publish, so the planner sees a real diff if the operation is interrupted. +// +// field must be a pointer to a field within config. Type safety is enforced by the +// compiler: field and value must have the same type F. +func SaveStateWith[F any](s *StateSaver, ctx context.Context, id string, config any, field *F, value F) { + saved := *field + *field = value + s.SaveState(ctx, id, config) + *field = saved } // SaveState saves the resource state. id must be the resource's identifier; on @@ -49,19 +63,19 @@ func (e *StateSaver) SaveState(ctx context.Context, id string, x any) { if xt != e.stateType { panic(fmt.Sprintf("SaveState: type mismatch: expected %v, got %v", e.stateType, xt)) } - if e.lastSaved != nil && structdiff.IsEqual(e.lastSaved, x) { + b, _ := json.Marshal(x) + if bytes.Equal(e.lastSaved, b) { log.Debugf(ctx, "SaveState: %s id=%s: skipping, state unchanged", e.resourceKey, id) return } - b, _ := json.Marshal(x) preview := string(b) if len(preview) > 100 { preview = preview[:100] } log.Debugf(ctx, "SaveState: %s id=%s %d bytes: %s", e.resourceKey, id, len(b), preview) - if err := e.saveFunc(e.id, x); err != nil { + if err := e.saveFunc(e.id, b); err != nil { logdiag.LogError(ctx, err) return } - e.lastSaved = x + e.lastSaved = b } diff --git a/bundle/direct/dstate/state.go b/bundle/direct/dstate/state.go index 5b2a70adbb3..f0fa001b9a0 100644 --- a/bundle/direct/dstate/state.go +++ b/bundle/direct/dstate/state.go @@ -81,6 +81,17 @@ func NewDatabase(lineage string, serial int) Database { } func (db *DeploymentState) SaveState(key, newID string, state any, dependsOn []deployplan.DependsOnEntry) error { + // don't indent so that every WAL entry remains on a single line + b, err := json.Marshal(state) + if err != nil { + return err + } + return db.SaveStateJSON(key, newID, b, dependsOn) +} + +// SaveStateJSON saves pre-marshaled JSON state, avoiding a redundant marshal when +// the caller already holds the serialized bytes (e.g. the StateSaver dedup path). +func (db *DeploymentState) SaveStateJSON(key, newID string, state json.RawMessage, dependsOn []deployplan.DependsOnEntry) error { db.AssertOpenedForWrite() db.mu.Lock() defer db.mu.Unlock() @@ -89,19 +100,13 @@ func (db *DeploymentState) SaveState(key, newID string, state any, dependsOn []d db.Data.State = make(map[string]ResourceEntry) } - // don't indent so that every WAL entry remains on a single line - jsonMessage, err := json.Marshal(state) - if err != nil { - return err - } - entry := ResourceEntry{ ID: newID, - State: json.RawMessage(jsonMessage), + State: state, DependsOn: dependsOn, } - err = appendJSONLine(db.walFile, WALEntry{Key: key, Value: &entry}) + err := appendJSONLine(db.walFile, WALEntry{Key: key, Value: &entry}) if err == nil { db.stateIDs[key] = newID } From 79818c6794e5a28befc075f259abc67d85f7e433 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 5 Jun 2026 20:15:26 +0200 Subject: [PATCH 21/21] direct: fix SaveStateWith parameter order (ctx first) Co-authored-by: Isaac --- bundle/direct/dresources/cluster.go | 2 +- bundle/direct/dresources/dashboard.go | 4 ++-- bundle/direct/dresources/sql_warehouse.go | 2 +- bundle/direct/dresources/state_saver.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bundle/direct/dresources/cluster.go b/bundle/direct/dresources/cluster.go index 40b215f99f4..1f92b5d42d8 100644 --- a/bundle/direct/dresources/cluster.go +++ b/bundle/direct/dresources/cluster.go @@ -157,7 +157,7 @@ func (r *ResourceCluster) DoCreate(ctx context.Context, engine *StateSaver, conf // Save with Lifecycle=nil: cluster exists but lifecycle has not been applied yet // (it always starts RUNNING). If the subsequent wait or stop is interrupted, the // planner sees a real diff (nil→desired) and re-applies lifecycle on the next deploy. - SaveStateWith(engine, ctx, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) + SaveStateWith(ctx, engine, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) // Always wait for RUNNING first: clusters start in PENDING state and must be polled. _, err = r.client.Clusters.WaitGetClusterRunning(ctx, id, 15*time.Minute, nil) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index a47a2436917..83ba060ad68 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -300,7 +300,7 @@ func (r *ResourceDashboard) DoCreate(ctx context.Context, engine *StateSaver, co config.Etag = createResp.Etag // Save with Published=false: draft exists, publish not yet done. Ensures the // planner sees a real diff (false→true) if publish is interrupted. - SaveStateWith(engine, ctx, createResp.DashboardId, config, &config.Published, false) + SaveStateWith(ctx, engine, createResp.DashboardId, config, &config.Published, false) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). @@ -339,7 +339,7 @@ func (r *ResourceDashboard) DoUpdate(ctx context.Context, engine *StateSaver, id // sync (a stale etag would make the next Update fail with a conflict) and records // published=false so the planner re-publishes on the next deploy. config.Etag = updateResp.Etag - SaveStateWith(engine, ctx, id, config, &config.Published, false) + SaveStateWith(ctx, engine, id, config, &config.Published, false) var publishResp *dashboards.PublishedDashboard // Note, today config.Published is always true (we do not have this field in input config). diff --git a/bundle/direct/dresources/sql_warehouse.go b/bundle/direct/dresources/sql_warehouse.go index fb079312395..e91975d0dd2 100644 --- a/bundle/direct/dresources/sql_warehouse.go +++ b/bundle/direct/dresources/sql_warehouse.go @@ -128,7 +128,7 @@ func (r *ResourceSqlWarehouse) DoCreate(ctx context.Context, engine *StateSaver, // Save with Lifecycle=nil: warehouse exists but lifecycle has not been applied yet // (it always starts RUNNING). If the subsequent wait or stop is interrupted, the // planner sees a real diff (nil→desired) and re-applies lifecycle on the next deploy. - SaveStateWith(engine, ctx, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) + SaveStateWith(ctx, engine, id, config, &config.Lifecycle, (*StateLifecycle)(nil)) if config.Lifecycle == nil || config.Lifecycle.Started == nil { return id, nil, nil diff --git a/bundle/direct/dresources/state_saver.go b/bundle/direct/dresources/state_saver.go index 8d7d9e0d71d..3d4f0656a9c 100644 --- a/bundle/direct/dresources/state_saver.go +++ b/bundle/direct/dresources/state_saver.go @@ -40,7 +40,7 @@ func NewNopStateSaver(stateType reflect.Type) *StateSaver { // // field must be a pointer to a field within config. Type safety is enforced by the // compiler: field and value must have the same type F. -func SaveStateWith[F any](s *StateSaver, ctx context.Context, id string, config any, field *F, value F) { +func SaveStateWith[F any](ctx context.Context, s *StateSaver, id string, config any, field *F, value F) { saved := *field *field = value s.SaveState(ctx, id, config)