Skip to content
28 changes: 28 additions & 0 deletions backend/app/alembic/versions/058_make_llm_call_fields_nullable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""make llm_call input_type, provider, model nullable

Revision ID: 058
Revises: 057
Create Date: 2026-05-11 00:00:00.000000

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "058"
down_revision = "057"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.alter_column("llm_call", "input_type", nullable=True)
op.alter_column("llm_call", "provider", nullable=True)
op.alter_column("llm_call", "model", nullable=True)


def downgrade() -> None:
op.alter_column("llm_call", "model", nullable=False)
op.alter_column("llm_call", "provider", nullable=False)
op.alter_column("llm_call", "input_type", nullable=False)
Comment on lines +25 to +28
Copy link
Copy Markdown

@coderabbitai coderabbitai Bot May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Downgrade can fail once NULL rows exist.

Setting these columns back to nullable=False without backfilling will fail on rollback if any row has NULL in input_type, provider, or model.

Suggested migration hardening
 def downgrade() -> None:
+    op.execute(
+        """
+        UPDATE llm_call
+        SET input_type = COALESCE(input_type, 'text'),
+            provider = COALESCE(provider, 'unknown'),
+            model = COALESCE(model, 'unknown')
+        """
+    )
     op.alter_column("llm_call", "model", nullable=False)
     op.alter_column("llm_call", "provider", nullable=False)
     op.alter_column("llm_call", "input_type", nullable=False)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/alembic/versions/058_make_llm_call_fields_nullable.py` around
lines 25 - 28, The downgrade() changes try to set llm_call.model,
llm_call.provider and llm_call.input_type to nullable=False which will fail if
any rows contain NULL; modify downgrade() to first backfill NULL values for
these columns (or abort with a clear error) before calling op.alter_column, e.g.
run UPDATE statements against the "llm_call" table to set sensible defaults for
model, provider and input_type or raise a RuntimeError if NULLs exist, then call
op.alter_column("llm_call", "model", nullable=False),
op.alter_column("llm_call", "provider", nullable=False) and
op.alter_column("llm_call", "input_type", nullable=False).

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

false alarm

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Prajna1999, understood! If downgrades are only expected to run in controlled environments (e.g., dev/staging with no real NULL rows), the concern wouldn't apply in practice. Thanks for the clarification.

99 changes: 98 additions & 1 deletion backend/app/crud/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
logger = logging.getLogger(__name__)


def serialize_input(query_input: QueryInput | str) -> str:
def serialize_input(query_input: QueryInput | str | list) -> str:
"""Serialize query input for database storage.

For text: stores the actual content value
Expand Down Expand Up @@ -279,3 +279,100 @@ def get_llm_calls_by_job_id(
)

return list(session.exec(statement).all())


def get_llm_call_by_job_id(session: Session, job_id: UUID) -> LlmCall | None:
"""Return the single active LlmCall for a standalone job (no chain_id)."""
statement = (
select(LlmCall)
.where(
LlmCall.job_id == job_id,
LlmCall.chain_id.is_(None),
LlmCall.deleted_at.is_(None),
)
.order_by(LlmCall.created_at.desc())
)
return session.exec(statement).first()
Comment thread
coderabbitai[bot] marked this conversation as resolved.


def create_llm_call_pending(
session: Session,
*,
job_id: UUID,
project_id: int,
organization_id: int,
request: LLMCallRequest,
chain_id: UUID | None = None,
) -> LlmCall:
"""Create a minimal LlmCall row at job-creation time.

Only fields available before config resolution are populated.
input_type, output_type, provider, model, content, usage stay NULL
and are filled in by the Celery task via update_llm_call_resolved_fields().
"""
config_dict: dict[str, Any] | None = None
if request.config.is_stored_config:
config_dict = {
"config_id": str(request.config.id),
"config_version": request.config.version,
}

conversation_id = None
auto_create = None
if request.query.conversation:
conversation_id = request.query.conversation.id
auto_create = request.query.conversation.auto_create

db_llm_call = LlmCall(
job_id=job_id,
project_id=project_id,
organization_id=organization_id,
chain_id=chain_id,
input=serialize_input(request.query.input),
conversation_id=conversation_id,
auto_create=auto_create,
config=config_dict,
)

session.add(db_llm_call)
session.commit()
session.refresh(db_llm_call)

logger.info(
f"[create_llm_call_pending] Created pending LLM call id={db_llm_call.id}, job_id={job_id}"
)

return db_llm_call


def update_llm_call_resolved_fields(
session: Session,
*,
llm_call_id: UUID,
input_type: str,
output_type: str | None,
provider: str | None,
model: str | None,
config: dict[str, Any],
) -> LlmCall:
"""Populate config-resolved fields on a pending LlmCall row."""
db_llm_call = session.get(LlmCall, llm_call_id)
if not db_llm_call:
raise ValueError(f"LLM call not found with id={llm_call_id}")

db_llm_call.input_type = input_type
db_llm_call.output_type = output_type
db_llm_call.provider = provider
db_llm_call.model = model
db_llm_call.config = config
db_llm_call.updated_at = now()

session.add(db_llm_call)
session.commit()
session.refresh(db_llm_call)

logger.info(
f"[update_llm_call_resolved_fields] Updated resolved fields | llm_call_id={llm_call_id}, provider={provider}, model={model}"
)

return db_llm_call
8 changes: 7 additions & 1 deletion backend/app/crud/llm_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any
from uuid import UUID

from sqlmodel import Session
from sqlmodel import Session, select

from app.core.util import now
from app.models.llm.request import ChainStatus, LlmChain
Expand Down Expand Up @@ -144,3 +144,9 @@ def update_llm_chain_block_completed(
f"llm_call_id={llm_call_id}"
)
return db_chain


def get_llm_chain_by_job_id(session: Session, job_id: UUID) -> LlmChain | None:
"""Return the LlmChain record associated with the given job."""
statement = select(LlmChain).where(LlmChain.job_id == job_id)
return session.exec(statement).first()
24 changes: 13 additions & 11 deletions backend/app/models/llm/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,11 +537,11 @@ class LlmCall(SQLModel, table=True):
)

# NOTE: image, pdf, multimodal are internal labels stored in the table not user facing.
input_type: Literal["text", "audio", "image", "pdf", "multimodal"] = Field(
...,
input_type: Literal["text", "audio", "image", "pdf", "multimodal"] | None = Field(
default=None,
sa_column=sa.Column(
sa.String,
nullable=False,
nullable=True,
comment="Input type: text, audio, image, pdf, multimodal",
),
)
Expand All @@ -556,20 +556,22 @@ class LlmCall(SQLModel, table=True):
)

# Provider and model info
provider: str = Field(
...,
provider: str | None = Field(
default=None,
sa_column=sa.Column(
sa.String,
nullable=False,
nullable=True,
comment="AI provider as sent by user (e.g openai, -native, google)",
),
)

model: str = Field(
...,
sa_column_kwargs={
"comment": "Specific model used e.g. 'gpt-4o', 'gemini-2.5-pro'"
},
model: str | None = Field(
default=None,
sa_column=sa.Column(
sa.String,
nullable=True,
comment="Specific model used e.g. 'gpt-4o', 'gemini-2.5-pro'",
),
)

# Response fields
Expand Down
Loading
Loading