From 677dd0d5c9600abaac54d7b69e70da763be35d32 Mon Sep 17 00:00:00 2001 From: Benjamin Habegger Date: Wed, 10 Jun 2026 16:44:17 +0200 Subject: [PATCH] OAK-12247: track totalIndexedNodes after each indexing cycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds getTotalDocCount() to FulltextIndexWriter (default -1 = not tracked) and wires it through all writer implementations: - DefaultIndexWriter: commit() then numDocs() before close() — no extra I/O since close() calls commit() internally; accurate after pending deletes. - MultiplexingIndexWriter: sums sub-writer counts (filters negatives for mounts that were never opened). - PooledLuceneIndexWriter: delegates to the wrapped writer. - ElasticIndexWriter: LongAdder initialised to prevTotal (incremental) or 0 (reindex); incremented/decremented in afterBulk() and deleteDocuments(). - LocalIndexWriter (NRT): returns -1 — NRT docs are in-memory only; the persistent count is updated by the next async cycle. closeWriter() reads getTotalDocCount() and persists it to :status/totalIndexedNodes when the value is >= 0. For the Elastic gap (empty reindex returns indexUpdated=false so the legacy :status block never ran), a separate additive block writes totalIndexedNodes and REINDEX_COMPLETION_TIMESTAMP for the !indexUpdated && reindex case. Kill switch FT_OAK-12247 (AtomicBoolean FT_OAK_12247_DISABLE, default false = tracking active) is registered as a FeatureToggle in LuceneIndexProviderService following the FT_OAK_12193 precedent. IndexStatsCollector and IndexStatsImpl are removed (net ~200 lines). --- .../lucene/LuceneIndexProviderService.java | 4 + .../lucene/LuceneIndexWriterFactory.java | 3 +- .../lucene/writer/DefaultIndexWriter.java | 12 + .../writer/MultiplexingIndexWriter.java | 11 + .../writer/PooledLuceneIndexWriter.java | 5 + .../lucene/writer/DefaultIndexWriterTest.java | 24 ++ .../index/ElasticBulkProcessorHandler.java | 23 +- .../elastic/index/ElasticIndexWriter.java | 22 +- .../plugins/index/search/IndexDefinition.java | 18 ++ .../editor/FulltextIndexEditorContext.java | 31 ++- .../spi/editor/FulltextIndexWriter.java | 8 + .../FulltextIndexEditorContextTest.java | 226 ++++++++++++++++++ 12 files changed, 381 insertions(+), 6 deletions(-) create mode 100644 oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexEditorContextTest.java diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java index ec2c5990d47..cab3471d0ec 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java @@ -57,6 +57,7 @@ import org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReaderFactory; import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache; import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditor; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; import org.apache.jackrabbit.oak.plugins.index.search.TextExtractionStatsMBean; @@ -384,6 +385,9 @@ private void activate(BundleContext bundleContext, Configuration config) throws oakRegs.add(whiteboard.register(FeatureToggle.class, new FeatureToggle(FulltextIndexEditor.FT_OAK_12193, FulltextIndexEditor.FT_OAK_12193_DISABLE), emptyMap())); + oakRegs.add(whiteboard.register(FeatureToggle.class, + new FeatureToggle(FulltextIndexEditorContext.FT_OAK_12247, FulltextIndexEditorContext.FT_OAK_12247_DISABLE), + emptyMap())); initializeIndexDir(bundleContext, config); initializeExtractedTextCache(bundleContext, config, statisticsProvider); tracker = createTracker(bundleContext, config); diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexWriterFactory.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexWriterFactory.java index 43960f6c11e..dceb7dac098 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexWriterFactory.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexWriterFactory.java @@ -28,5 +28,6 @@ public interface LuceneIndexWriterFactory extends FulltextIndexWriterFactory>, AutoCloseable { @Override - LuceneIndexWriter newInstance(IndexDefinition definition, NodeBuilder definitionBuilder, CommitInfo commitInfo, boolean reindex); + LuceneIndexWriter newInstance(IndexDefinition definition, NodeBuilder definitionBuilder, + CommitInfo commitInfo, boolean reindex); } diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java index 79a7790cbf0..10aa82dd384 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java @@ -67,6 +67,7 @@ class DefaultIndexWriter implements LuceneIndexWriter { private Directory directory; private long genAtStart = -1; private boolean indexUpdated = false; + private long totalDocCount = -1L; public DefaultIndexWriter(LuceneIndexDefinition definition, NodeBuilder definitionBuilder, DirectoryFactory directoryFactory, String dirName, String suggestDirName, @@ -144,6 +145,12 @@ public boolean close(long timestamp) throws IOException { PERF_LOGGER.end(start, -1, "Completed suggester for directory {}", definition); } + // OAK-12247: commit() applies all pending delete queries before close(), + // making writer.numDocs() accurate for totalIndexedNodes tracking. + // close() calls commit() internally so this adds no extra I/O. + writer.commit(); + totalDocCount = writer.numDocs(); + writer.close(); PERF_LOGGER.end(start, -1, "Closed writer for directory {}", definition); @@ -296,6 +303,11 @@ private static void trackIndexSizeInfo(@NotNull IndexWriter writer, log.trace("Directory overall size: {}, files: {}", IOUtils.humanReadableByteCount(overallSize), sb); } + @Override + public long getTotalDocCount() { + return totalDocCount; + } + @Override public String toString() { return "DefaultIndexWriter{" + diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexingIndexWriter.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexingIndexWriter.java index 0f2b95c3edf..b648e196d55 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexingIndexWriter.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/MultiplexingIndexWriter.java @@ -110,6 +110,17 @@ private DefaultIndexWriter createWriter(Mount m) { suggestDirName, reindex, writerConfig); } + @Override + public long getTotalDocCount() { + if (writers.isEmpty()) { + return -1L; + } + return writers.values().stream() + .mapToLong(LuceneIndexWriter::getTotalDocCount) + .filter(c -> c >= 0) + .sum(); + } + @Override public String toString() { return "MultiplexingIndexWriter{" + diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/PooledLuceneIndexWriter.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/PooledLuceneIndexWriter.java index 73e27e5ff62..6b2541c5d38 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/PooledLuceneIndexWriter.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/PooledLuceneIndexWriter.java @@ -76,4 +76,9 @@ public String formatStatistics() { "deletes: " + deleteCount + "]"; } + + @Override + public long getTotalDocCount() { + return delegateWriter.getTotalDocCount(); + } } diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriterTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriterTest.java index 84584bf1870..23c2f564520 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriterTest.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriterTest.java @@ -142,6 +142,30 @@ public void useUpdateForNormalIndexing() throws Exception{ writer.close(100); } + /** + * Verifies that {@code getTotalDocCount()} returns the correct document count + * after the writer is closed (OAK-12247). + */ + @Test + public void getTotalDocCountReflectsIndexedDocuments() throws Exception { + FSDirectoryFactory fsdir = new FSDirectoryFactory(folder.getRoot()); + LuceneIndexDefinition defn = new LuceneIndexDefinition(root, builder.getNodeState(), "/foo"); + DefaultIndexWriter writer = new DefaultIndexWriter(defn, builder, + fsdir, INDEX_DATA_CHILD_NAME, SUGGEST_DATA_CHILD_NAME, true, writerConfig); + + Document doc1 = new Document(); + doc1.add(newPathField("/a/b")); + writer.updateDocument("/a/b", doc1); + + Document doc2 = new Document(); + doc2.add(newPathField("/a/c")); + writer.updateDocument("/a/c", doc2); + + writer.close(0); + + assertEquals("getTotalDocCount() should return 2 after indexing 2 documents", 2L, writer.getTotalDocCount()); + } + private DefaultIndexWriter createWriter(LuceneIndexDefinition defn, boolean reindex) { return new DefaultIndexWriter(defn, builder, new DefaultDirectoryFactory(null, null), INDEX_DATA_CHILD_NAME, diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticBulkProcessorHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticBulkProcessorHandler.java index 2498c1409f0..d309d8b9427 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticBulkProcessorHandler.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticBulkProcessorHandler.java @@ -48,6 +48,7 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; @@ -66,6 +67,7 @@ static class IndexInfo { public final NodeBuilder definitionBuilder; public final boolean waitForESAcknowledgement; public final boolean isRealTime; + public final LongAdder docCount; /** * Exceptions occurred while trying to update index in elasticsearch */ @@ -77,11 +79,16 @@ static class IndexInfo { boolean indexModified = false; IndexInfo(String indexName, ElasticIndexDefinition indexDefinition, NodeBuilder definitionBuilder, boolean waitForESAcknowledgement, boolean isRealTime) { + this(indexName, indexDefinition, definitionBuilder, waitForESAcknowledgement, isRealTime, new LongAdder()); + } + + IndexInfo(String indexName, ElasticIndexDefinition indexDefinition, NodeBuilder definitionBuilder, boolean waitForESAcknowledgement, boolean isRealTime, LongAdder docCount) { this.indexName = indexName; this.indexDefinition = indexDefinition; this.definitionBuilder = definitionBuilder; this.waitForESAcknowledgement = waitForESAcknowledgement; this.isRealTime = isRealTime; + this.docCount = docCount; } } @@ -180,6 +187,13 @@ public ElasticBulkProcessorHandler(@NotNull ElasticConnection elasticConnection) * This option is available for sync index definitions only. */ public void registerIndex(String indexName, ElasticIndexDefinition indexDefinition, NodeBuilder definitionBuilder, CommitInfo commitInfo, boolean waitForESAcknowledgement) { + registerIndex(indexName, indexDefinition, definitionBuilder, commitInfo, waitForESAcknowledgement, new LongAdder()); + } + + /** + * Registers an ElasticIndex with the given index definition configuration and document-count tracker. + */ + public void registerIndex(String indexName, ElasticIndexDefinition indexDefinition, NodeBuilder definitionBuilder, CommitInfo commitInfo, boolean waitForESAcknowledgement, LongAdder docCount) { checkOpen(); if (registeredIndexes.containsKey(indexName)) { LOG.warn("Index already registered: {}", indexName); @@ -205,7 +219,7 @@ public void registerIndex(String indexName, ElasticIndexDefinition indexDefiniti } else { isRealTime = false; } - return new IndexInfo(indexName, indexDefinition, definitionBuilder, waitForESAcknowledgement, isRealTime); + return new IndexInfo(indexName, indexDefinition, definitionBuilder, waitForESAcknowledgement, isRealTime, docCount); }); } } @@ -461,6 +475,13 @@ public void afterBulk(long executionId, BulkRequest request, List { private static final Logger LOG = LoggerFactory.getLogger(ElasticIndexWriter.class); @@ -65,6 +66,7 @@ class ElasticIndexWriter implements FulltextIndexWriter { private final boolean reindex; private final String indexName; private final ElasticRetryPolicy retryPolicy; + private final LongAdder docCount; ElasticIndexWriter(@NotNull ElasticIndexTracker indexTracker, @NotNull ElasticConnection elasticConnection, @@ -80,6 +82,10 @@ class ElasticIndexWriter implements FulltextIndexWriter { this.bulkProcessorHandler = bulkProcessorHandler; this.retryPolicy = retryPolicy; + long prevTotal = reindex ? 0L : Math.max(0L, indexDefinition.getTotalIndexedNodes()); + this.docCount = new LongAdder(); + this.docCount.add(prevTotal); + // We don't use stored index definitions with elastic. Every time a new writer gets created we // use the actual index name (based on the current seed) while reindexing, or the alias (pointing to the // old index until the new one gets enabled) during incremental reindexing @@ -115,7 +121,7 @@ class ElasticIndexWriter implements FulltextIndexWriter { waitForESAcknowledgement = false; } } - bulkProcessorHandler.registerIndex(indexName, indexDefinition, definitionBuilder, commitInfo, waitForESAcknowledgement); + bulkProcessorHandler.registerIndex(indexName, indexDefinition, definitionBuilder, commitInfo, waitForESAcknowledgement, docCount); } @TestOnly @@ -140,6 +146,7 @@ class ElasticIndexWriter implements FulltextIndexWriter { this.indexName = indexDefinition.getIndexAlias(); this.retryPolicy = retryPolicy; this.reindex = reindex; + this.docCount = new LongAdder(); } @Override @@ -167,11 +174,14 @@ public void updateDocument(String path, ElasticDocument doc) throws IOException @Override public void deleteDocuments(String path) throws IOException { + // Direct bulk delete: queues exactly 1 delete for the document AT `path`, matched by its + // document ID (derived from the path). Stats for this delete are incremented via + // OakBulkListener.afterBulk() when the response arrives (result="deleted"). retryPolicy.withRetries(() -> bulkProcessorHandler.delete(indexName, ElasticIndexUtils.idFromPath(path))); if (!ElasticIndexEditorProvider.FT_OAK_12206_DISABLE.get()) { // Delete all descendants: mirrors Lucene's PrefixQuery on the path term. - // The :ancestors field is indexed with path_hierarchy, so a term query on `path` - // matches every document whose ancestor chain includes that path. + // :ancestors stores parent paths only (not self), so deleteByQuery targets + // strict descendants; the direct bulk delete covers the node itself. // The ES Bulk API does not support delete by query, so we need to issue a separate request. // This is not ideal but should be ok since deletes are expected to be less frequent than updates. // The alternative would be to get the list of affected documents and issue a bulk delete by id, @@ -182,6 +192,7 @@ public void deleteDocuments(String path) throws IOException { response.failures().forEach(f -> LOG.warn("Failed to delete descendants of {}: shard {} reason {}", path, f.id(), f.cause())); if (response.deleted() != null && response.deleted() > 0) { LOG.info("Deleted {} descendants of {} in {} ms", response.deleted(), path, response.took()); + docCount.add(-response.deleted()); } }); } @@ -203,6 +214,11 @@ public boolean close(long timestamp) throws IOException { return updateStatus; } + @Override + public long getTotalDocCount() { + return docCount.sum(); + } + private void saveMetrics() { ElasticIndexNode indexNode = indexTracker.acquireIndexNode(indexDefinition.getIndexPath()); if (indexNode != null) { diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java index 0459375b9ae..b4e22bfb867 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java @@ -199,6 +199,7 @@ public class IndexDefinition implements Aggregate.AggregateMapper { public static final String CREATION_TIMESTAMP = "creationTimestamp"; public static final String REINDEX_COMPLETION_TIMESTAMP = "reindexCompletionTimestamp"; + public static final String PROP_TOTAL_INDEXED_NODES = "totalIndexedNodes"; /** * Meta property which provides the unique id @@ -746,6 +747,23 @@ public String getUniqueId() { return uid; } + /** + * Returns the total number of documents in the index as of the last + * completed indexing cycle, or {@code -1} if never recorded. + */ + public long getTotalIndexedNodes() { + PropertyState prop = definition.getChildNode(STATUS_NODE).getProperty(PROP_TOTAL_INDEXED_NODES); + return prop != null ? prop.getValue(Type.LONG) : -1L; + } + + /** + * Returns {@code true} if at least one full reindex cycle has completed + * (i.e. {@code REINDEX_COMPLETION_TIMESTAMP} is present in {@code :status}). + */ + public boolean isReindexCompleted() { + return definition.getChildNode(STATUS_NODE).hasProperty(REINDEX_COMPLETION_TIMESTAMP); + } + public boolean isNRTIndexingEnabled() { return nrtIndexMode; } diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditorContext.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditorContext.java index b8771332286..a91062463f9 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditorContext.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditorContext.java @@ -44,6 +44,7 @@ import java.io.IOException; import java.util.Calendar; import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; import static java.util.Objects.requireNonNull; import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_RANDOM_SEED; @@ -61,6 +62,14 @@ public abstract class FulltextIndexEditorContext { private static final PerfLogger PERF_LOGGER = new PerfLogger(LoggerFactory.getLogger(FulltextIndexEditorContext.class.getName() + ".perf")); + public static final String FT_OAK_12247 = "FT_OAK-12247"; + + /** + * Kill switch for OAK-12247 totalIndexedNodes tracking. Default {@code false} + * (tracking active). Set to {@code true} to revert to legacy behaviour. + */ + public static final AtomicBoolean FT_OAK_12247_DISABLE = new AtomicBoolean(false); + protected IndexDefinition definition; protected final NodeBuilder definitionBuilder; @@ -154,7 +163,8 @@ public void setPropertyUpdateCallback(PropertyUpdateCallback propertyUpdateCallb public void closeWriter() throws IOException { Calendar currentTime = getCalendar(); final long start = PERF_LOGGER.start(); - boolean indexUpdated = getWriter().close(currentTime.getTimeInMillis()); + FulltextIndexWriter writer = getWriter(); // OAK-12247: local ref needed for getTotalDocCount() after close + boolean indexUpdated = writer.close(currentTime.getTimeInMillis()); if (indexUpdated) { PERF_LOGGER.end(start, -1, "Closed writer for directory {}", definition); @@ -175,6 +185,25 @@ public void closeWriter() throws IOException { textExtractor.done(reindex); } } + + // OAK-12247: persist totalIndexedNodes and fix the Elastic empty-reindex gap + // (empty reindex returns indexUpdated=false, so the block above never runs and + // REINDEX_COMPLETION_TIMESTAMP would not be written — planner has no signal). + // When indexUpdated=true the legacy block already wrote REINDEX_COMPLETION_TIMESTAMP, + // so we only write it here for the !indexUpdated && reindex case. + if (!FT_OAK_12247_DISABLE.get() && (indexUpdated || reindex)) { + NodeBuilder status = definitionBuilder.child(IndexDefinition.STATUS_NODE); + long total = writer.getTotalDocCount(); + if (total >= 0) { + status.setProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES, total); + } + if (!indexUpdated && reindex) { + status.setProperty(IndexDefinition.REINDEX_COMPLETION_TIMESTAMP, + ISO8601.format(currentTime), Type.DATE); + log.info("{} set for index: {}", IndexDefinition.REINDEX_COMPLETION_TIMESTAMP, + definition.getIndexPath()); + } + } } private String getUpdatedTime(Calendar currentTime) { diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexWriter.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexWriter.java index bc19febc66a..277d8e82bec 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexWriter.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexWriter.java @@ -49,4 +49,12 @@ public interface FulltextIndexWriter { * @return true if index was updated or any write happened. */ boolean close(long timestamp) throws IOException; + + /** + * Returns the total number of documents in the index after this writer is closed, + * or {@code -1} if not tracked by this implementation. + */ + default long getTotalDocCount() { + return -1L; + } } diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexEditorContextTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexEditorContextTest.java new file mode 100644 index 00000000000..d7ffe12f731 --- /dev/null +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexEditorContextTest.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.search; + +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.IndexCommitCallback; +import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback; +import org.apache.jackrabbit.oak.plugins.index.IndexingContext; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.DocumentMaker; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriter; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriterFactory; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +/** + * Unit tests for OAK-12247 totalIndexedNodes tracking in FulltextIndexEditorContext. + */ +public class FulltextIndexEditorContextTest { + + private NodeState root; + private NodeBuilder definitionBuilder; + private TestIndexingContext indexingContext; + + @Before + public void setUp() { + MemoryNodeStore store = new MemoryNodeStore(); + root = store.getRoot(); + definitionBuilder = root.builder().child("oak:index").child("testIndex"); + indexingContext = new TestIndexingContext("/oak:index/testIndex"); + FulltextIndexEditorContext.FT_OAK_12247_DISABLE.set(false); + } + + @After + public void tearDown() { + FulltextIndexEditorContext.FT_OAK_12247_DISABLE.set(false); + } + + @Test + public void emptyReindex_writesZeroTotalAndTimestamp() throws Exception { + // Writer returns totalDocCount=0 and indexUpdated=false (empty reindex) + TestWriterFactory factory = new TestWriterFactory(0L, false); + TestEditorContext ctx = new TestEditorContext(root, definitionBuilder, factory, indexingContext); + ctx.setReindex(true); + ctx.closeWriter(); + + NodeState status = definitionBuilder.getNodeState().getChildNode(IndexDefinition.STATUS_NODE); + PropertyState totalProp = status.getProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES); + assertNotNull("totalIndexedNodes must be written even for empty reindex", totalProp); + assertEquals(0L, totalProp.getValue(Type.LONG).longValue()); + + PropertyState tsProp = status.getProperty(IndexDefinition.REINDEX_COMPLETION_TIMESTAMP); + assertNotNull("REINDEX_COMPLETION_TIMESTAMP must be written even when indexUpdated=false", tsProp); + } + + @Test + public void nonEmptyReindex_writesTotalFromWriter() throws Exception { + // Writer returns totalDocCount=100 and indexUpdated=true + TestWriterFactory factory = new TestWriterFactory(100L, true); + TestEditorContext ctx = new TestEditorContext(root, definitionBuilder, factory, indexingContext); + ctx.setReindex(true); + ctx.closeWriter(); + + NodeState status = definitionBuilder.getNodeState().getChildNode(IndexDefinition.STATUS_NODE); + PropertyState totalProp = status.getProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES); + assertNotNull(totalProp); + assertEquals(100L, totalProp.getValue(Type.LONG).longValue()); + } + + @Test + public void incremental_writesTotalFromWriter() throws Exception { + // Writer directly returns its computed total (e.g. 103 after net +3 on prev 100) + TestWriterFactory factory = new TestWriterFactory(103L, true); + TestEditorContext ctx = new TestEditorContext(root, definitionBuilder, factory, indexingContext); + ctx.closeWriter(); + + NodeState status = definitionBuilder.getNodeState().getChildNode(IndexDefinition.STATUS_NODE); + PropertyState totalProp = status.getProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES); + assertNotNull(totalProp); + assertEquals(103L, totalProp.getValue(Type.LONG).longValue()); + } + + @Test + public void writerReturnsMinusOne_totalNotWritten() throws Exception { + // Writer returns -1 (not tracked) → no property written + TestWriterFactory factory = new TestWriterFactory(-1L, true); + TestEditorContext ctx = new TestEditorContext(root, definitionBuilder, factory, indexingContext); + ctx.closeWriter(); + + NodeState status = definitionBuilder.getNodeState().getChildNode(IndexDefinition.STATUS_NODE); + assertNull("totalIndexedNodes must NOT be written when writer returns -1", + status.getProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES)); + } + + @Test + public void toggleDisabled_totalNotWritten() throws Exception { + FulltextIndexEditorContext.FT_OAK_12247_DISABLE.set(true); + + TestWriterFactory factory = new TestWriterFactory(50L, true); + TestEditorContext ctx = new TestEditorContext(root, definitionBuilder, factory, indexingContext); + ctx.closeWriter(); + + NodeState status = definitionBuilder.getNodeState().getChildNode(IndexDefinition.STATUS_NODE); + assertNull("totalIndexedNodes must NOT be written when toggle is disabled", + status.getProperty(IndexDefinition.PROP_TOTAL_INDEXED_NODES)); + } + + // --------------- helpers --------------- + + /** + * Minimal concrete subclass of FulltextIndexEditorContext for testing. + * It bypasses createIndexDefinition() by passing a pre-built IndexDefinition. + */ + private static class TestEditorContext extends FulltextIndexEditorContext { + + TestEditorContext(NodeState root, NodeBuilder definitionBuilder, + FulltextIndexWriterFactory factory, + IndexingContext indexingContext) { + super(root, definitionBuilder, + new IndexDefinition(root, definitionBuilder.getNodeState(), "/oak:index/testIndex"), + IndexUpdateCallback.NOOP, factory, new ExtractedTextCache(0, 0), + indexingContext, true); + } + + /** Expose the private reindex field via the existing enableReindexMode mechanism. */ + void setReindex(boolean reindex) { + if (reindex) { + enableReindexMode(); + } + } + + @Override + public IndexDefinition.Builder newDefinitionBuilder() { + return new IndexDefinition.BaseBuilder(); + } + + @Override + public DocumentMaker newDocumentMaker(IndexDefinition.IndexingRule rule, String path) { + return null; + } + } + + /** + * Writer factory that creates a {@link StubWriter} reporting a pre-configured + * {@code totalDocCount} from {@link FulltextIndexWriter#getTotalDocCount()}. + */ + private static class TestWriterFactory implements FulltextIndexWriterFactory { + private final long totalDocCount; + private final boolean indexUpdated; + + TestWriterFactory(long totalDocCount, boolean indexUpdated) { + this.totalDocCount = totalDocCount; + this.indexUpdated = indexUpdated; + } + + @Override + public FulltextIndexWriter newInstance(IndexDefinition definition, + NodeBuilder definitionBuilder, + CommitInfo commitInfo, + boolean reindex) { + return new StubWriter(totalDocCount, indexUpdated); + } + } + + private static class StubWriter implements FulltextIndexWriter { + private final long totalDocCount; + private final boolean indexUpdated; + + StubWriter(long totalDocCount, boolean indexUpdated) { + this.totalDocCount = totalDocCount; + this.indexUpdated = indexUpdated; + } + + @Override public void updateDocument(String path, Object doc) {} + @Override public void deleteDocuments(String path) {} + + @Override + public boolean close(long timestamp) { + return indexUpdated; + } + + @Override + public long getTotalDocCount() { + return totalDocCount; + } + } + + private static class TestIndexingContext implements IndexingContext { + private final String indexPath; + + TestIndexingContext(String indexPath) { + this.indexPath = indexPath; + } + + @Override public String getIndexPath() { return indexPath; } + @Override public CommitInfo getCommitInfo() { return CommitInfo.EMPTY; } + @Override public boolean isReindexing() { return false; } + @Override public boolean isAsync() { return true; } + @Override public void indexUpdateFailed(Exception e) {} + @Override public void registerIndexCommitCallback(IndexCommitCallback callback) {} + } +}