diff --git a/docs/inkless/configs.rst b/docs/inkless/configs.rst
index 5c5e9b487c8..86de9ed18bf 100644
--- a/docs/inkless/configs.rst
+++ b/docs/inkless/configs.rst
@@ -71,8 +71,23 @@ Under ``inkless.``
* Valid Values: [1,...]
* Importance: medium
+``produce.pipelined.enabled``
+ Whether to use the pipelined writer instead of the lock-based writer. The pipelined writer uses a SEDA architecture to eliminate lock contention: validation is parallelized across N worker threads, and buffer writing is handled by a single dedicated thread. This eliminates the global writer lock bottleneck.
+
+ * Type: boolean
+ * Default: false
+ * Importance: medium
+
+``produce.pipelined.validation.threads``
+ Number of validation worker threads for the pipelined writer. These threads perform CPU-intensive validation work (CRC validation, size checks, offset assignment) in parallel. A value of 0 means auto-detect (uses available processors).
+
+ * Type: int
+ * Default: 0
+ * Valid Values: [0,...]
+ * Importance: medium
+
``produce.upload.backoff.ms``
- The number of millisecond to back off for before the next upload attempt.
+ The number of milliseconds to back off for before the next upload attempt.
* Type: int
* Default: 10
diff --git a/storage/inkless/src/main/java/io/aiven/inkless/config/InklessConfig.java b/storage/inkless/src/main/java/io/aiven/inkless/config/InklessConfig.java
index 12f08fc7f86..704db7e1b44 100644
--- a/storage/inkless/src/main/java/io/aiven/inkless/config/InklessConfig.java
+++ b/storage/inkless/src/main/java/io/aiven/inkless/config/InklessConfig.java
@@ -64,9 +64,24 @@ public class InklessConfig extends AbstractConfig {
private static final int PRODUCE_MAX_UPLOAD_ATTEMPTS_DEFAULT = 3;
public static final String PRODUCE_UPLOAD_BACKOFF_MS_CONFIG = PRODUCE_PREFIX + "upload.backoff.ms";
- private static final String PRODUCE_UPLOAD_BACKOFF_MS_DOC = "The number of millisecond to back off for before the next upload attempt.";
+ private static final String PRODUCE_UPLOAD_BACKOFF_MS_DOC = "The number of milliseconds to back off for before the next upload attempt.";
private static final int PRODUCE_UPLOAD_BACKOFF_MS_DEFAULT = 10;
+ public static final String PRODUCE_PIPELINED_PREFIX = PRODUCE_PREFIX + "pipelined.";
+
+ public static final String PRODUCE_PIPELINED_ENABLED_CONFIG = PRODUCE_PIPELINED_PREFIX + "enabled";
+ private static final String PRODUCE_PIPELINED_ENABLED_DOC = "Whether to use the pipelined writer instead of the lock-based writer. "
+ + "The pipelined writer uses a SEDA architecture to eliminate lock contention: "
+ + "validation is parallelized across N worker threads, and buffer writing is handled by a single dedicated thread. "
+ + "This eliminates the global writer lock bottleneck.";
+ private static final boolean PRODUCE_PIPELINED_ENABLED_DEFAULT = false;
+
+ public static final String PRODUCE_PIPELINED_VALIDATION_THREADS_CONFIG = PRODUCE_PIPELINED_PREFIX + "validation.threads";
+ private static final String PRODUCE_PIPELINED_VALIDATION_THREADS_DOC = "Number of validation worker threads for the pipelined writer. "
+ + "These threads perform CPU-intensive validation work (CRC validation, size checks, offset assignment) in parallel. "
+ + "A value of 0 means auto-detect (uses available processors).";
+ private static final int PRODUCE_PIPELINED_VALIDATION_THREADS_DEFAULT = 0;
+
public static final String STORAGE_PREFIX = "storage.";
public static final String STORAGE_BACKEND_CLASS_CONFIG = STORAGE_PREFIX + "backend.class";
@@ -261,6 +276,23 @@ public static ConfigDef configDef() {
PRODUCE_UPLOAD_BACKOFF_MS_DOC
);
+ configDef.define(
+ PRODUCE_PIPELINED_ENABLED_CONFIG,
+ ConfigDef.Type.BOOLEAN,
+ PRODUCE_PIPELINED_ENABLED_DEFAULT,
+ ConfigDef.Importance.MEDIUM,
+ PRODUCE_PIPELINED_ENABLED_DOC
+ );
+
+ configDef.define(
+ PRODUCE_PIPELINED_VALIDATION_THREADS_CONFIG,
+ ConfigDef.Type.INT,
+ PRODUCE_PIPELINED_VALIDATION_THREADS_DEFAULT,
+ ConfigDef.Range.atLeast(0),
+ ConfigDef.Importance.MEDIUM,
+ PRODUCE_PIPELINED_VALIDATION_THREADS_DOC
+ );
+
configDef.define(
STORAGE_BACKEND_CLASS_CONFIG,
ConfigDef.Type.CLASS,
@@ -544,6 +576,14 @@ public Duration produceUploadBackoff() {
return Duration.ofMillis(getInt(PRODUCE_UPLOAD_BACKOFF_MS_CONFIG));
}
+ public boolean producePipelinedEnabled() {
+ return getBoolean(PRODUCE_PIPELINED_ENABLED_CONFIG);
+ }
+
+ public int producePipelinedValidationThreads() {
+ return getInt(PRODUCE_PIPELINED_VALIDATION_THREADS_CONFIG);
+ }
+
public int fetchCacheBlockBytes() {
return getInt(CONSUME_CACHE_BLOCK_BYTES_CONFIG);
}
diff --git a/storage/inkless/src/main/java/io/aiven/inkless/produce/ActiveFile.java b/storage/inkless/src/main/java/io/aiven/inkless/produce/ActiveFile.java
index 1f02f23a61d..f2c1ac2f72c 100644
--- a/storage/inkless/src/main/java/io/aiven/inkless/produce/ActiveFile.java
+++ b/storage/inkless/src/main/java/io/aiven/inkless/produce/ActiveFile.java
@@ -25,6 +25,7 @@
import org.apache.kafka.common.errors.RecordTooLargeException;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.record.MemoryRecords;
+import org.apache.kafka.common.record.MutableRecordBatch;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.kafka.common.requests.ProduceResponse;
import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
@@ -189,6 +190,59 @@ int size() {
return buffer.totalSize();
}
+ /**
+ * Adds a pre-validated batch directly to the buffer.
+ *
+ *
This method is used by {@link PipelinedWriter} where validation is done
+ * in a separate stage (validation workers) before reaching the buffer writer.
+ * The batch is assumed to be already validated, so this method only adds it
+ * to the buffer without re-validation.
+ *
+ * @param topicIdPartition the partition for the batch
+ * @param recordBatch the pre-validated record batch
+ * @param requestId the request ID for tracking
+ */
+ void addBatchDirect(
+ final TopicIdPartition topicIdPartition,
+ final MutableRecordBatch recordBatch,
+ final int requestId
+ ) {
+ if (start == null) {
+ start = TimeUtils.durationMeasurementNow(time);
+ }
+ buffer.addBatch(topicIdPartition, recordBatch, requestId);
+ }
+
+ /**
+ * Registers a future to be completed when the file is committed.
+ *
+ *
This method is used by {@link PipelinedWriter} to store the result future
+ * along with the original request data. The future will be completed by
+ * {@link AppendCompleter} when the file commit succeeds or fails.
+ *
+ * @param requestId the request ID
+ * @param resultFuture the future to complete with the partition responses
+ * @param originalRecords the original records from the produce request
+ * @param invalidBatches the batches that failed validation (already completed with errors)
+ */
+ void addAwaitingFuture(
+ final int requestId,
+ final CompletableFuture