Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion schemas/dab.draft.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,8 @@
"name": { "type": "string", "description": "Parameter name" },
"required": { "$ref": "#/$defs/boolean-or-string", "description": "Is parameter required" },
"default": { "type": ["string", "number", "boolean", "null"], "description": "Default value" },
"description": { "type": "string", "description": "Parameter description. Since descriptions for multiple parameters are provided as a comma-separated string, individual parameter descriptions must not contain a comma (',')." }
"description": { "type": "string", "description": "Parameter description. Since descriptions for multiple parameters are provided as a comma-separated string, individual parameter descriptions must not contain a comma (',')." },
"embed": { "type": "boolean", "description": "When true, the parameter text is automatically converted to an embedding vector via the configured embedding service before being passed to the stored procedure. Requires runtime.embeddings to be configured. Only valid on stored-procedure entities. The target stored procedure parameter must be declared as VECTOR(N) — DAB cannot detect non-VECTOR misconfigurations at startup due to SQL Server metadata limitations.", "default": false }
}
}
}
Expand Down
14 changes: 14 additions & 0 deletions src/Config/ObjectModel/ParameterMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,19 @@ public class ParameterMetadata
/// Gets or sets the default value of the parameter, if any.
/// </summary>
public string? Default { get; set; }

/// <summary>
/// When true, the parameter value (text) is automatically embedded via the
/// EmbeddingService and the resulting vector is passed to the stored procedure.
/// Only valid on stored-procedure entities when runtime.embeddings is configured.
///
/// IMPORTANT: The target stored procedure parameter must be declared as VECTOR(N).
/// SQL Server's metadata system reports VECTOR(N) and varbinary indistinguishably,
/// so DAB cannot detect this misconfiguration at startup. If embed:true is applied
/// to a non-VECTOR parameter (e.g., NVARCHAR or VARBINARY), the request will fail
/// at runtime with a SQL error or return semantically incorrect results.
/// It is the developer's responsibility to ensure the sproc parameter is VECTOR(N).
Comment on lines +34 to +38
/// </summary>
public bool Embed { get; set; }
}
}
4 changes: 4 additions & 0 deletions src/Core/Azure.DataApiBuilder.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@
<ProjectReference Include="..\Service.GraphQLBuilder\Azure.DataApiBuilder.Service.GraphQLBuilder.csproj" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="Azure.DataApiBuilder.Service.Tests" />
</ItemGroup>

<ItemGroup>
Comment on lines +62 to 65
<None Include="..\..\nuget\nuget_core\README.md" Pack="true" PackagePath="\" />
<None Include="..\..\nuget\nuget_icon.png" Pack="true" PackagePath="\" />
Expand Down
148 changes: 148 additions & 0 deletions src/Core/Configurations/RuntimeConfigValidator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ public void ValidateConfigProperties()
ValidateAzureLogAnalyticsAuth(runtimeConfig);
ValidateFileSinkPath(runtimeConfig);
ValidateEmbeddingsOptions(runtimeConfig);
ValidateEmbedParameters(runtimeConfig);
}

/// <summary>
Expand Down Expand Up @@ -421,7 +422,154 @@ public void ValidateEmbeddingsOptions(RuntimeConfig runtimeConfig)
subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError));
}
}
}

/// <summary>
/// Validates that parameters with embed=true are only used on stored-procedure entities
/// and that runtime.embeddings is configured when embed parameters are present.
Comment on lines +428 to +429
/// </summary>
/// <remarks>
/// Internal (rather than private) to allow direct unit testing via the
/// <c>InternalsVisibleTo</c> attribute on Azure.DataApiBuilder.Core. Callers outside
/// the assembly should still go through <see cref="ValidateConfigProperties"/>.
/// </remarks>
internal void ValidateEmbedParameters(RuntimeConfig runtimeConfig)
{
// Check once whether the embedding service is configured and enabled.
// Example: "runtime": { "embeddings": { "enabled": true, "provider": "azure-openai" } } → true
// Example: embeddings section missing or "enabled": false → false
bool embeddingsConfigured = runtimeConfig.Runtime?.Embeddings is not null
&& runtimeConfig.Runtime.Embeddings.Enabled;

// Loop through every entity in the config.
// Example entities: "Product" (table), "Category" (table), "SearchProducts" (sproc)
foreach ((string entityName, Entity entity) in runtimeConfig.Entities)
{
// Skip entities that have no parameters defined.
// Tables and views typically don't have parameters.
// Example: "Product": { "source": { "type": "table" } } → Parameters is null → skip
if (entity.Source.Parameters is null)
{
continue;
}

// Fast-path: skip entities with no embed:true parameters entirely.
// Avoids the data-source lookup and inner loop for the common case of
// entities whose params are all normal pass-through.
if (!entity.Source.Parameters.Any(p => p.Embed))
{
continue;
}

// Hoist data source lookup outside the param loop — it's entity-scoped, not param-scoped.
// Looked up once per entity instead of once per parameter (was duplicated work in Stage 3.5).
DataSource entityDataSource = runtimeConfig.GetDataSourceFromEntityName(entityName);

// Check each parameter for the embed flag.
// Example: iterates over { "name": "query_vector", "embed": true } and { "name": "top_k", "default": "5" }
foreach (ParameterMetadata param in entity.Source.Parameters)
{
// Skip parameters that don't have embed: true. Most params are normal pass-through.
// Example: "top_k" has Embed=false (default) → skip
// Example: "query_vector" has Embed=true → continue to validation checks
if (!param.Embed)
{
continue;
}

// Rule 0: embed:true is only supported on Azure SQL / SQL Server data sources.
// The metadata type override (Byte[] → String) only exists in MsSqlMetadataProvider.
// For PostgreSQL/MySQL/Cosmos, the request would fail at runtime with a confusing
// type error. Reject at startup instead.
// Example FAIL: PostgreSQL entity with embed:true → "embed feature only supported for MSSQL"
// TODO: Extend to PostgreSQL/MySQL once their metadata providers grow embed-aware type-override logic.
if (entityDataSource.DatabaseType != DatabaseType.MSSQL)
{
HandleOrRecordException(new DataApiBuilderException(
message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but the data source type is '{entityDataSource.DatabaseType}'. The embed feature is currently only supported for Azure SQL / SQL Server.",
statusCode: HttpStatusCode.ServiceUnavailable,
subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError));
}

// Rule 1: embed:true is only valid on stored-procedure entities.
// Tables/views don't have user-supplied parameters that get passed to SQL.
// Example PASS: "SearchProducts": { "source": { "type": "stored-procedure" } }
// Example FAIL: "Product": { "source": { "type": "table", "parameters": [{"name":"x","embed":true}] } }
// → Error: "Entity 'Product': parameter 'x' has 'embed: true' but is only valid on stored-procedure entities."
if (entity.Source.Type is not EntitySourceType.StoredProcedure)
{
HandleOrRecordException(new DataApiBuilderException(
message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but is only valid on stored-procedure entities.",
statusCode: HttpStatusCode.ServiceUnavailable,
subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError));
}

// Rule 2: embed:true requires runtime.embeddings to be configured and enabled.
// Can't convert text to vectors without an embedding service.
// Example PASS: "embeddings": { "enabled": true, "provider": "azure-openai", "api-key": "..." }
// Example FAIL: "embeddings": { "enabled": false } or embeddings section missing
// → Error: "parameter 'query_vector' has 'embed: true' but runtime.embeddings is not configured or not enabled."
if (!embeddingsConfigured)
{
HandleOrRecordException(new DataApiBuilderException(
message: $"Entity '{entityName}': parameter '{param.Name}' has 'embed: true' but runtime.embeddings is not configured or not enabled.",
statusCode: HttpStatusCode.ServiceUnavailable,
subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError));
}

// Rule 3: embed:true with a default value is not supported.
//
// An embed parameter represents the user's text input that gets converted
// to a vector at request time — typically a semantic-search query.
//
// Setting a default for an embed parameter would mean: if the client doesn't
// supply a search query, the server invents one (e.g., "wireless headphones"),
// embeds it, and runs a semantic search the user never asked for. That isn't
// a fallback — it's the server fabricating user input. In any real UX, a
// missing search query indicates a client bug or an empty search box, not an
// invitation for the server to substitute a canned query on the user's behalf.
//
// (Defaults on non-embed parameters of the same sproc are unaffected by this
// rule and continue to work as before.)
//
// Even setting aside the UX concern, supporting embed-defaults would be
// non-trivial:
// - GraphQL schema defaults are baked in at startup as typed literals
// (GraphQLStoredProcedureBuilder.ConvertValueToGraphQLType). There is no
// VECTOR literal type in GraphQL, and the literal text would surface in
// introspection as a misleading default value for an embedded parameter.
// - REST/MCP defaults are injected as plain text into the resolved-parameter
// dictionary, then would be re-embedded by ParameterEmbeddingHelper on
// every request — a hidden per-request cost for a value the client never
// sent.
// - Embedding the default once at startup would couple application startup
// to the embedding provider's network availability (validation runs in
// CLI / startup contexts that may not have outbound access).
//
// What happens today if a client forgets to supply an embed parameter:
// - {"query_vector": null} or "" → 400 BadRequest "has 'embed: true' but
// the provided text is empty or whitespace." (caught by ParameterEmbeddingHelper)
// - field omitted entirely → 400 DatabaseInputError "expects parameter
// '@query_vector', which was not supplied." (SQL Server error, parsed
// by MsSqlDbExceptionParser)
// Both produce a clear, actionable client error — no silent failure.
//
// If a real use case for embed-defaults ever emerges, this rule can be lifted
// with the matching runtime support added. For now, embed parameters should
// always be supplied by the client.
//
// Example PASS: { "name": "query_vector", "embed": true } (no default)
// Example FAIL: { "name": "query_vector", "embed": true, "default": "wireless headphones" }
// → Error: "parameter 'query_vector' has both 'embed: true' and a 'default' value. Embed parameters cannot have default values."
if (param.Default is not null)
{
HandleOrRecordException(new DataApiBuilderException(
message: $"Entity '{entityName}': parameter '{param.Name}' has both 'embed: true' and a 'default' value. Embed parameters cannot have default values.",
statusCode: HttpStatusCode.ServiceUnavailable,
subStatusCode: DataApiBuilderException.SubStatusCodes.ConfigValidationError));
}
}
}
}

/// <summary>
Expand Down
17 changes: 7 additions & 10 deletions src/Core/Resolvers/Factories/MutationEngineFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Azure.DataApiBuilder.Core.Configurations;
using Azure.DataApiBuilder.Core.Models;
using Azure.DataApiBuilder.Core.Services.MetadataProviders;
using Azure.DataApiBuilder.Core.Services.Embeddings;
using Azure.DataApiBuilder.Service.Exceptions;
using Microsoft.AspNetCore.Http;
using static Azure.DataApiBuilder.Config.DabConfigEvents;
Expand All @@ -29,18 +30,11 @@ public class MutationEngineFactory : IMutationEngineFactory
private readonly IHttpContextAccessor _httpContextAccessor;
private readonly IAuthorizationResolver _authorizationResolver;
private readonly GQLFilterParser _gQLFilterParser;
private readonly IEmbeddingService? _embeddingService;

/// <summary>
/// Initializes a new instance of the <see cref="MutationEngineFactory"/> class.
/// </summary>
/// <param name="runtimeConfigProvider">runtimeConfigProvider.</param>
/// <param name="queryManagerFactory">queryManagerFactory</param>
/// <param name="metadataProviderFactory">metadataProviderFactory.</param>
/// <param name="cosmosClientProvider">cosmosClientProvider</param>
/// <param name="queryEngineFactory">queryEngineFactory.</param>
/// <param name="httpContextAccessor">httpContextAccessor.</param>
/// <param name="authorizationResolver">authorizationResolver.</param>
/// <param name="gQLFilterParser">GqlFilterParser.</param>
public MutationEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
IAbstractQueryManagerFactory queryManagerFactory,
IMetadataProviderFactory metadataProviderFactory,
Expand All @@ -49,7 +43,8 @@ public MutationEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
IHttpContextAccessor httpContextAccessor,
IAuthorizationResolver authorizationResolver,
GQLFilterParser gQLFilterParser,
HotReloadEventHandler<HotReloadEventArgs>? handler)
HotReloadEventHandler<HotReloadEventArgs>? handler,
IEmbeddingService? embeddingService = null)

{
handler?.Subscribe(MUTATION_ENGINE_FACTORY_ON_CONFIG_CHANGED, OnConfigChanged);
Expand All @@ -61,6 +56,7 @@ public MutationEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
_queryEngineFactory = queryEngineFactory;
_runtimeConfigProvider = runtimeConfigProvider;
_gQLFilterParser = gQLFilterParser;
_embeddingService = embeddingService;
_mutationEngines = new Dictionary<DatabaseType, IMutationEngine>();
ConfigureMutationEngines();
}
Expand All @@ -78,7 +74,8 @@ private void ConfigureMutationEngines()
_authorizationResolver,
_gQLFilterParser,
_httpContextAccessor,
_runtimeConfigProvider);
_runtimeConfigProvider,
_embeddingService);
_mutationEngines.Add(DatabaseType.MySQL, mutationEngine);
_mutationEngines.Add(DatabaseType.MSSQL, mutationEngine);
_mutationEngines.Add(DatabaseType.PostgreSQL, mutationEngine);
Expand Down
9 changes: 7 additions & 2 deletions src/Core/Resolvers/Factories/QueryEngineFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using Azure.DataApiBuilder.Core.Models;
using Azure.DataApiBuilder.Core.Services.Cache;
using Azure.DataApiBuilder.Core.Services.MetadataProviders;
using Azure.DataApiBuilder.Core.Services.Embeddings;
using Azure.DataApiBuilder.Service.Exceptions;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
Expand All @@ -33,6 +34,7 @@ public class QueryEngineFactory : IQueryEngineFactory
private readonly GQLFilterParser _gQLFilterParser;
private readonly DabCacheService _cache;
private readonly ILogger<IQueryEngine> _logger;
private readonly IEmbeddingService? _embeddingService;

/// <inheritdoc/>
public QueryEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
Expand All @@ -44,7 +46,8 @@ public QueryEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
GQLFilterParser gQLFilterParser,
ILogger<IQueryEngine> logger,
DabCacheService cache,
HotReloadEventHandler<HotReloadEventArgs>? handler)
HotReloadEventHandler<HotReloadEventArgs>? handler,
IEmbeddingService? embeddingService = null)
{
handler?.Subscribe(QUERY_ENGINE_FACTORY_ON_CONFIG_CHANGED, OnConfigChanged);
_queryEngines = new Dictionary<DatabaseType, IQueryEngine>();
Expand All @@ -57,6 +60,7 @@ public QueryEngineFactory(RuntimeConfigProvider runtimeConfigProvider,
_gQLFilterParser = gQLFilterParser;
_cache = cache;
_logger = logger;
_embeddingService = embeddingService;

ConfigureQueryEngines();
}
Expand All @@ -75,7 +79,8 @@ public void ConfigureQueryEngines()
_gQLFilterParser,
_logger,
_runtimeConfigProvider,
_cache);
_cache,
_embeddingService);
_queryEngines.Add(DatabaseType.MSSQL, queryEngine);
_queryEngines.Add(DatabaseType.MySQL, queryEngine);
_queryEngines.Add(DatabaseType.PostgreSQL, queryEngine);
Expand Down
Loading
Loading