-
Notifications
You must be signed in to change notification settings - Fork 55
feat: replace skipDuplicates with OnDuplicate enum (Fail/Skip/Upsert) #861
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4a2715c
306cd1e
7d2458f
399e83c
9fcb94e
4dd3ef6
76bda6a
a42f4cb
b24ece8
14a8a3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| use Utopia\Database\Exception\Timeout as TimeoutException; | ||
| use Utopia\Database\Exception\Truncate as TruncateException; | ||
| use Utopia\Database\Helpers\ID; | ||
| use Utopia\Database\OnDuplicate; | ||
| use Utopia\Database\Operator; | ||
| use Utopia\Database\Query; | ||
|
|
||
|
|
@@ -2376,4 +2377,33 @@ public function getSupportForTTLIndexes(): bool | |
| { | ||
| return false; | ||
| } | ||
|
|
||
| /** | ||
| * MariaDB/MySQL Upsert: append `ON DUPLICATE KEY UPDATE col = VALUES(col), ...` | ||
| * so the INSERT replaces the matched row's columns instead of throwing. | ||
| * | ||
| * @param array<string> $columns | ||
| */ | ||
| protected function getInsertSuffix(string $table, array $columns = []): string | ||
| { | ||
| if ($this->onDuplicate !== OnDuplicate::Upsert || empty($columns)) { | ||
| return ''; | ||
| } | ||
|
|
||
| $assignments = []; | ||
| foreach ($columns as $col) { | ||
| // Skip the primary unique key (_uid) — no need to "update" what matched. | ||
| // Safe to skip _id too since it's auto-increment in INSERT and untouched on update. | ||
| if (\in_array($col, ['`_uid`', '`_id`'], true)) { | ||
| continue; | ||
| } | ||
| $assignments[] = "{$col} = VALUES({$col})"; | ||
| } | ||
|
|
||
| if (empty($assignments)) { | ||
| return ''; | ||
| } | ||
|
|
||
| return 'ON DUPLICATE KEY UPDATE ' . \implode(', ', $assignments); | ||
| } | ||
|
Comment on lines
+2381
to
+2408
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Find all callers and check column formatting
rg -nP --type=php -C3 '\bgetInsertSuffix\s*\('
# Look at how columns are assembled at call sites
ast-grep --pattern $'$_->getInsertSuffix($$$)'
# Verify there are existing assertions locking _createdAt immutability across upsert
rg -nP --type=php -C2 '_createdAt' tests | rg -i 'upsert|on.?duplicate'Repository: utopia-php/database Length of output: 2686 🏁 Script executed: #!/bin/bash
# 1. Get full Postgres::getInsertSuffix() implementation
echo "=== Postgres::getInsertSuffix() ==="
sed -n '2389,2420p' src/Database/Adapter/Postgres.php
# 2. Get getUpsertStatement() implementation at line 1296
echo -e "\n=== MariaDB::getUpsertStatement() ==="
sed -n '1296,1350p' src/Database/Adapter/MariaDB.php
# 3. Check how columnList is formatted when passed to getInsertSuffix
echo -e "\n=== columnList assembly in SQL.php ==="
sed -n '2620,2640p' src/Database/Adapter/SQL.php
# 4. Search for _createdAt exclusion patterns in upsert
echo -e "\n=== _createdAt patterns in MariaDB ==="
rg -n '_createdAt' src/Database/Adapter/MariaDB.php -C2 | head -50Repository: utopia-php/database Length of output: 5517 🏁 Script executed: #!/bin/bash
# Search for test files that verify _createdAt behavior in upserts
echo "=== Test files checking _createdAt immutability ==="
fd -e '.php' tests | xargs grep -l '_createdAt' | head -10
# Check for specific upsert+_createdAt tests
echo -e "\n=== Tests with upsert and _createdAt ==="
rg -l 'upsert' tests --type=php | xargs rg '_createdAt' -C3 | head -100Repository: utopia-php/database Length of output: 156 🏁 Script executed: #!/bin/bash
# Find all callers of getUpsertStatement
echo "=== Callers of getUpsertStatement() ==="
rg -n 'getUpsertStatement' src/Database --type=php -B2 -A2
# Check if getInsertSuffix was recently added
echo -e "\n=== Recent additions to getInsertSuffix ==="
git log --oneline -p -- src/Database/Adapter/MariaDB.php | grep -A20 'getInsertSuffix' | head -60Repository: utopia-php/database Length of output: 3247 🏁 Script executed: #!/bin/bash
# Verify when batch inserts with OnDuplicate::Upsert are actually used
echo "=== OnDuplicate::Upsert usage ==="
rg -n 'OnDuplicate::Upsert' src/Database --type=php -B3 -A3 | head -100
# Check if there are high-level upsert methods in Database class
echo -e "\n=== Database class upsert methods ==="
rg -n 'def.*upsert|function.*upsert' src/Database/Database.php --type=php -iRepository: utopia-php/database Length of output: 2284
The new batch insert path via ON DUPLICATE KEY UPDATE `_createdAt` = VALUES(`_createdAt`), `_tenant` = VALUES(`_tenant`), ...Issues:
Align the skip list with 🛡️ Proposed fix protected function getInsertSuffix(string $table, array $columns = []): string
{
if ($this->onDuplicate !== OnDuplicate::Upsert || empty($columns)) {
return '';
}
$assignments = [];
foreach ($columns as $col) {
- // Skip the primary unique key (_uid) — no need to "update" what matched.
- // Safe to skip _id too since it's auto-increment in INSERT and untouched on update.
- if (\in_array($col, ['`_uid`', '`_id`'], true)) {
+ // Skip the conflict key (_uid), the surrogate PK (_id), the tenant
+ // discriminator (_tenant), and _createdAt — the original creation
+ // timestamp must be preserved on an upsert collision.
+ if (\in_array($col, ['`_uid`', '`_id`', '`_tenant`', '`_createdAt`'], true)) {
continue;
}
$assignments[] = "{$col} = VALUES({$col})";
}
if (empty($assignments)) {
return '';
}
return 'ON DUPLICATE KEY UPDATE ' . \implode(', ', $assignments);
}🧰 Tools🪛 PHPMD (2.15.0)[warning] 2398-2398: Avoid unused parameters such as '$table'. (undefined) (UnusedFormalParameter) 🤖 Prompt for AI Agents |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| use Utopia\Database\Exception\Timeout as TimeoutException; | ||
| use Utopia\Database\Exception\Transaction as TransactionException; | ||
| use Utopia\Database\Exception\Type as TypeException; | ||
| use Utopia\Database\OnDuplicate; | ||
| use Utopia\Database\Query; | ||
| use Utopia\Database\Validator\Authorization; | ||
| use Utopia\Mongo\Client; | ||
|
|
@@ -123,7 +124,8 @@ public function withTransaction(callable $callback): mixed | |
| } | ||
|
|
||
| // upsert + $setOnInsert hits WriteConflict (E112) under txn snapshot isolation. | ||
| if ($this->skipDuplicates) { | ||
| // Both Skip and Upsert modes use the no-transaction path. | ||
| if ($this->onDuplicate !== OnDuplicate::Fail) { | ||
| return $callback(); | ||
| } | ||
|
|
||
|
|
@@ -429,10 +431,12 @@ public function createCollection(string $name, array $attributes = [], array $in | |
| { | ||
| $id = $this->getNamespace() . '_' . $this->filter($name); | ||
|
|
||
| // In shared-tables mode or for metadata, the physical collection may | ||
| // already exist for another tenant. Return early to avoid a | ||
| // "Collection Exists" exception from the client. | ||
| if (!$this->inTransaction && ($this->getSharedTables() || $name === Database::METADATA) && $this->exists($this->getNamespace(), $name)) { | ||
| // In shared-tables mode or for metadata the physical collection may | ||
| // already exist. Return early to avoid "Collection Exists" from the | ||
| // client. | ||
| $tolerateExisting = $this->getSharedTables() || $name === Database::METADATA; | ||
|
|
||
| if (!$this->inTransaction && $tolerateExisting && $this->exists($this->getNamespace(), $name)) { | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -443,14 +447,15 @@ public function createCollection(string $name, array $attributes = [], array $in | |
| } catch (MongoException $e) { | ||
| $e = $this->processException($e); | ||
| if ($e instanceof DuplicateException) { | ||
| // Keep existing shared-tables/metadata behavior — no-op there. | ||
| return true; | ||
|
Comment on lines
449
to
451
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Preserve After 🐛 Proposed fix if ($e instanceof DuplicateException) {
// Also tolerate client-reported duplicates in Skip/Upsert mode.
if ($tolerateExisting) {
return true;
}
- // Keep existing shared-tables/metadata behavior — no-op there.
- return true;
+ throw $e;
}🤖 Prompt for AI Agents |
||
| } | ||
| // Client throws code-0 "Collection Exists" when its pre-check | ||
| // finds the collection. In shared-tables/metadata context this | ||
| // is a no-op; otherwise re-throw as DuplicateException so | ||
| // Database::createCollection() can run orphan reconciliation. | ||
| // finds the collection. Tolerated contexts no-op; otherwise re-throw | ||
| // as DuplicateException so Database::createCollection() can run | ||
| // orphan reconciliation. | ||
| if ($e->getCode() === 0 && stripos($e->getMessage(), 'Collection Exists') !== false) { | ||
| if ($this->getSharedTables() || $name === Database::METADATA) { | ||
| if ($tolerateExisting) { | ||
| return true; | ||
| } | ||
| throw new DuplicateException('Collection already exists', $e->getCode(), $e); | ||
|
|
@@ -1497,30 +1502,33 @@ public function createDocuments(Document $collection, array $documents): array | |
| $records[] = $record; | ||
| } | ||
|
|
||
| // insertMany aborts the txn on any duplicate; upsert + $setOnInsert no-ops instead. | ||
| if ($this->skipDuplicates) { | ||
| // insertMany aborts the txn on any duplicate; Mongo's upsert path handles | ||
| // both Skip ($setOnInsert: insert-only no-op) and Upsert ($set: overwrite). | ||
| if ($this->onDuplicate !== OnDuplicate::Fail) { | ||
| if (empty($records)) { | ||
| return []; | ||
| } | ||
|
|
||
| $operator = $this->onDuplicate === OnDuplicate::Upsert ? '$set' : '$setOnInsert'; | ||
|
|
||
| $operations = []; | ||
| foreach ($records as $record) { | ||
| $filter = ['_uid' => $record['_uid'] ?? '']; | ||
| if ($this->sharedTables) { | ||
| $filter['_tenant'] = $record['_tenant'] ?? $this->getTenant(); | ||
| } | ||
|
|
||
| // Filter fields can't reappear in $setOnInsert (mongo path-conflict error). | ||
| $setOnInsert = $record; | ||
| unset($setOnInsert['_uid'], $setOnInsert['_tenant']); | ||
| // Filter fields can't reappear in $setOnInsert/$set (mongo path-conflict error). | ||
| $payload = $record; | ||
| unset($payload['_uid'], $payload['_tenant']); | ||
|
Comment on lines
+1521
to
+1523
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Every call to The $payload = $record;
unset($payload['_uid'], $payload['_tenant']);
$idValue = $payload['_id'] ?? null;
unset($payload['_id']);
$update = [$operator => $payload];
if ($operator === '$set' && $idValue !== null) {
$update['$setOnInsert'] = ['_id' => $idValue];
}
$operations[] = [
'filter' => $filter,
'update' => $update,
]; |
||
|
|
||
| if (empty($setOnInsert)) { | ||
| if (empty($payload)) { | ||
| continue; | ||
| } | ||
|
|
||
| $operations[] = [ | ||
| 'filter' => $filter, | ||
| 'update' => ['$setOnInsert' => $setOnInsert], | ||
| 'update' => [$operator => $payload], | ||
|
Comment on lines
+1521
to
+1531
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: In MongoDB, the _id field is immutable and cannot be modified once a document exists. Attempting to use $set on _id in an update operation (with or without upsert) will fail with an error like "Performing an update on the path '_id' would modify the immutable field '_id'". For upsert operations (upsert: true), the _id is determined by the query filter if it specifies _id. For example: db.collection.updateOne( { _id: ObjectId("...") }, { $set: { field: "value" } }, { upsert: true } ) If no document matches, MongoDB inserts a new document using the _id from the query filter, applying the update operators (like $set). $setOnInsert operators are also applied only on insert. Do not include _id in $set or $setOnInsert in the update document, as MongoDB will reject attempts to set _id via update operators even with $setOnInsert (historical behavior confirmed in sources, fixed in later versions but still not recommended). Instead, specify _id in the query filter for controlled upserts. Example of correct upsert without setting _id in update: db.collection.updateOne( { _id: 123 }, { $set: { status: "active" }, $setOnInsert: { createdAt: new Date } }, { upsert: true } ) This sets _id: 123 on insert from the query, applies both $set and $setOnInsert on insert, or only $set on update. If the query doesn't specify _id, MongoDB auto-generates ObjectId for _id on insert. Citations:
🏁 Script executed: # Find and examine the upsert operation context in Mongo.php around lines 1527-1537
cd src/Database/Adapter && head -1560 Mongo.php | tail -50Repository: utopia-php/database Length of output: 1846 🏁 Script executed: # Get broader context of the upsert method to understand filter and payload construction
cd src/Database/Adapter && sed -n '1480,1560p' Mongo.phpRepository: utopia-php/database Length of output: 2845 Strip In 🔧 Suggested approachInclude $filter = ['_uid' => $record['_uid'] ?? ''];
+ if (\array_key_exists('_id', $record)) {
+ $filter['_id'] = $record['_id'];
+ }
if ($this->sharedTables) {
$filter['_tenant'] = $record['_tenant'] ?? $this->getTenant();
}
// Filter fields can't reappear in $setOnInsert/$set (mongo path-conflict error).
$payload = $record;
- unset($payload['_uid'], $payload['_tenant']);
+ unset($payload['_uid'], $payload['_tenant'], $payload['_id']);This way, 🤖 Prompt for AI Agents |
||
| ]; | ||
| } | ||
|
|
||
|
|
@@ -3590,8 +3598,8 @@ protected function processException(\Throwable $e): \Throwable | |
| return new DuplicateException('Collection already exists', $e->getCode(), $e); | ||
| } | ||
|
|
||
| // Index already exists | ||
| if ($e->getCode() === 85) { | ||
| // Index already exists (85 = IndexOptionsConflict, 86 = IndexKeySpecsConflict) | ||
| if ($e->getCode() === 85 || $e->getCode() === 86) { | ||
| return new DuplicateException('Index already exists', $e->getCode(), $e); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| use Utopia\Database\Exception\Transaction as TransactionException; | ||
| use Utopia\Database\Exception\Truncate as TruncateException; | ||
| use Utopia\Database\Helpers\ID; | ||
| use Utopia\Database\OnDuplicate; | ||
| use Utopia\Database\Operator; | ||
| use Utopia\Database\Query; | ||
|
|
||
|
|
@@ -241,6 +242,7 @@ public function createCollection(string $name, array $attributes = [], array $in | |
| } | ||
|
|
||
| $sqlTenant = $this->sharedTables ? '_tenant INTEGER DEFAULT NULL,' : ''; | ||
|
|
||
| $collection = " | ||
| CREATE TABLE {$this->getSQLTable($id)} ( | ||
| _id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, | ||
|
|
@@ -291,19 +293,19 @@ public function createCollection(string $name, array $attributes = [], array $in | |
| $uniquePermissionIndex = $this->getShortKey("{$namespace}_{$this->tenant}_{$id}_ukey"); | ||
| $permissionIndex = $this->getShortKey("{$namespace}_{$this->tenant}_{$id}_permission"); | ||
| $permissions .= " | ||
| CREATE UNIQUE INDEX \"{$uniquePermissionIndex}\" | ||
| CREATE UNIQUE INDEX \"{$uniquePermissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_tenant,_document,_type,_permission); | ||
| CREATE INDEX \"{$permissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_tenant,_permission,_type); | ||
| CREATE INDEX \"{$permissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_tenant,_permission,_type); | ||
| "; | ||
| } else { | ||
| $uniquePermissionIndex = $this->getShortKey("{$namespace}_{$id}_ukey"); | ||
| $permissionIndex = $this->getShortKey("{$namespace}_{$id}_permission"); | ||
| $permissions .= " | ||
| CREATE UNIQUE INDEX \"{$uniquePermissionIndex}\" | ||
| CREATE UNIQUE INDEX \"{$uniquePermissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_document COLLATE utf8_ci_ai,_type,_permission); | ||
| CREATE INDEX \"{$permissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_permission,_type); | ||
| CREATE INDEX \"{$permissionIndex}\" | ||
| ON {$this->getSQLTable($id . '_perms')} USING btree (_permission,_type); | ||
| "; | ||
| } | ||
|
|
||
|
|
@@ -2352,23 +2354,55 @@ public function getSupportForOptionalSpatialAttributeWithExistingRows(): bool | |
|
|
||
| protected function getInsertKeyword(): string | ||
| { | ||
| // Postgres doesn't have INSERT IGNORE — Skip/Upsert are realized via | ||
| // the ON CONFLICT suffix (see getInsertSuffix). | ||
| return 'INSERT INTO'; | ||
| } | ||
|
|
||
| protected function getInsertSuffix(string $table): string | ||
| /** | ||
| * Postgres permissions insert uses ON CONFLICT DO NOTHING via suffix; the | ||
| * keyword stays plain INSERT INTO. Override MariaDB's INSERT IGNORE (no-op | ||
| * in MySQL dialect, a syntax error in Postgres). | ||
| */ | ||
| protected function getInsertPermissionsKeyword(): string | ||
| { | ||
| return 'INSERT INTO'; | ||
| } | ||
|
|
||
| /** | ||
| * @param array<string> $columns | ||
| */ | ||
| protected function getInsertSuffix(string $table, array $columns = []): string | ||
| { | ||
| if (!$this->skipDuplicates) { | ||
| if ($this->onDuplicate === OnDuplicate::Fail) { | ||
| return ''; | ||
| } | ||
|
|
||
| $conflictTarget = $this->sharedTables ? '("_uid", "_tenant")' : '("_uid")'; | ||
|
|
||
| return "ON CONFLICT {$conflictTarget} DO NOTHING"; | ||
| if ($this->onDuplicate === OnDuplicate::Skip) { | ||
| return "ON CONFLICT {$conflictTarget} DO NOTHING"; | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| // Upsert: DO UPDATE SET col = EXCLUDED.col for every column except the conflict key. | ||
| $assignments = []; | ||
| foreach ($columns as $col) { | ||
| if (\in_array($col, ['"_uid"', '"_id"', '"_tenant"'], true)) { | ||
| continue; | ||
| } | ||
| $assignments[] = "{$col} = EXCLUDED.{$col}"; | ||
| } | ||
|
|
||
| if (empty($assignments)) { | ||
| return "ON CONFLICT {$conflictTarget} DO NOTHING"; | ||
| } | ||
|
|
||
| return "ON CONFLICT {$conflictTarget} DO UPDATE SET " . \implode(', ', $assignments); | ||
| } | ||
|
Comment on lines
+2387
to
2401
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Confirm how SQL::createDocuments passes the columns list into getInsertSuffix
# (expecting quoted identifiers like `"_uid"` to match the exclusion set),
# and check how other adapters (MariaDB) exclude _createdAt on Upsert.
rg -nP -C5 'getInsertSuffix\s*\(' --type=php
rg -nP -C3 "'_createdAt'|\"_createdAt\"" src/Database/Adapter --type=phpRepository: utopia-php/database Length of output: 19136 🏁 Script executed: # Extract the full getInsertSuffix() implementation in Postgres.php
sed -n '2375,2410p' src/Database/Adapter/Postgres.php
# Check MariaDB's implementation for comparison
sed -n '2387,2420p' src/Database/Adapter/MariaDB.php
# Search for getInsertPermissionsSuffix() to understand permissions handling
rg -nP 'getInsertPermissionsSuffix' --type=php -A5Repository: utopia-php/database Length of output: 3171 🏁 Script executed: # Get full getInsertPermissionsSuffix() implementations
sed -n '2403,2430p' src/Database/Adapter/Postgres.php
echo "---"
rg -nP 'getInsertPermissionsSuffix' src/Database/Adapter/MariaDB.php -A15
# Check how columns list is built in createDocuments (search for where $columnList is created)
sed -n '2550,2600p' src/Database/Adapter/SQL.php | head -60
# Verify what columns are included in the upsert
rg -nP '\$columnList' src/Database/Adapter/SQL.php -B2 -A2Repository: utopia-php/database Length of output: 3518 Upsert incorrectly updates In Additionally, the 🛠 Proposed fix- // Upsert: DO UPDATE SET col = EXCLUDED.col for every column except the conflict key.
+ // Upsert: DO UPDATE SET col = EXCLUDED.col for every column except the
+ // conflict/identity/immutable columns.
$assignments = [];
foreach ($columns as $col) {
- if (\in_array($col, ['"_uid"', '"_id"', '"_tenant"'], true)) {
+ if (\in_array($col, ['"_uid"', '"_id"', '"_tenant"', '"_createdAt"'], true)) {
continue;
}
$assignments[] = "{$col} = EXCLUDED.{$col}";
}🤖 Prompt for AI Agents |
||
|
|
||
| protected function getInsertPermissionsSuffix(): string | ||
| { | ||
| if (!$this->skipDuplicates) { | ||
| if ($this->onDuplicate === OnDuplicate::Fail) { | ||
| return ''; | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.