From 37085b83ace8ae48608063c6eda73214d1e47982 Mon Sep 17 00:00:00 2001 From: Yehor Malikov Date: Wed, 17 Jun 2026 22:10:53 +0200 Subject: [PATCH 1/3] verify: track written blocks for experimental replay experimental_verify replays the workload instead of keeping fio's normal per-write io_piece history. With norandommap, random writes may overwrite the same offset multiple times and leave other offsets untouched. During verify replay, fio can then read an offset that was never written and report a false "bad magic header 0" failure. Add a per-file bitmap for experimental_verify + norandommap to record which blocks were actually written. During replay, skip offsets that were never written. Mark bits only after writes are accepted for queueing or completion so serialize_overlap/FIO_Q_BUSY requeues do not create false written entries. Signed-off-by: Yehor Malikov --- backend.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fio.h | 1 + 2 files changed, 121 insertions(+) diff --git a/backend.c b/backend.c index 7f41bdfa46..7f7433b974 100644 --- a/backend.c +++ b/backend.c @@ -54,6 +54,7 @@ #include "pshared.h" #include "zone-dist.h" #include "fio_time.h" +#include "lib/axmap.h" static struct fio_sem *startup_sem; static struct flist_head *cgroup_list; @@ -614,6 +615,68 @@ static enum fio_q_status io_u_submit(struct thread_data *td, struct io_u *io_u) return td_io_queue(td, io_u); } +static struct axmap *ev_map_for_io_u(struct thread_data *td, struct io_u *io_u) +{ + if (!td->ev_maps || !io_u->file) + return NULL; + + return td->ev_maps[io_u->file->fileno]; +} + +static uint64_t ev_map_block(struct thread_data *td, struct io_u *io_u) +{ + struct fio_file *f = io_u->file; + + return (io_u->offset - f->file_offset) / (uint64_t) td->o.rw_min_bs; +} + +static uint64_t ev_map_nr_blocks(struct thread_data *td, unsigned long long len) +{ + return (len + td->o.rw_min_bs - 1) / (uint64_t) td->o.rw_min_bs; +} + +static void ev_map_mark(struct thread_data *td, struct io_u *io_u) +{ + struct axmap *map = ev_map_for_io_u(td, io_u); + uint64_t block, nr_blocks; + + if (!map) + return; + + block = ev_map_block(td, io_u); + nr_blocks = ev_map_nr_blocks(td, io_u->buflen); + axmap_set_nr(map, block, nr_blocks); +} + +static bool ev_map_init(struct thread_data *td) +{ + struct fio_file *f; + unsigned int i; + + if (td->ev_maps) + return true; + + td->ev_maps = calloc(td->files_index, sizeof(*td->ev_maps)); + if (!td->ev_maps) { + td_verror(td, ENOMEM, "experimental verify bitmap array"); + return false; + } + + for_each_file(td, f, i) { + uint64_t fsize = min(f->real_file_size, f->io_size); + uint64_t blocks = (fsize + td->o.rw_min_bs - 1) / + (uint64_t) td->o.rw_min_bs; + + td->ev_maps[i] = axmap_new(blocks); + if (!td->ev_maps[i]) { + td_verror(td, ENOMEM, "experimental verify bitmap"); + return false; + } + } + + return true; +} + /* * The main verify engine. Runs over the writes we previously submitted, * reads the blocks back in, and checks the crc/md5 of the data. @@ -720,6 +783,9 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes) goto reap; } + if (io_u->end_io) + break; + /* * We are only interested in the places where * we wrote or trimmed IOs. Turn those into @@ -742,9 +808,33 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes) } break; } else if (io_u->ddir == DDIR_WRITE) { + struct axmap *map = ev_map_for_io_u(td, io_u); + uint64_t block; + io_u->ddir = DDIR_READ; io_u->numberio = td->verify_read_issues; td->verify_read_issues++; + + if (map) { + uint64_t nr_blocks = ev_map_nr_blocks(td, io_u->buflen); + bool written = true; + uint64_t b; + + block = ev_map_block(td, io_u); + for (b = 0; b < nr_blocks; b++) { + if (!axmap_isset(map, block + b)) { + written = false; + break; + } + } + + if (!written) { + td->bytes_verified += io_u->buflen; + put_io_u(td, io_u); + continue; + } + } + populate_verify_io_u(td, io_u); if (td_io_prep(td, io_u)) { put_io_u(td, io_u); @@ -1334,6 +1424,16 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done) if (td->error) break; + /* + * Mark written blocks before handing the io_u to the + * submission workqueue. Once enqueued, the io_u is owned + * by the offload worker thread and may be submitted, + * completed, and recycled before this point is reached. + */ + if (td_write(td) && ddir == DDIR_WRITE && + td->o.experimental_verify) + ev_map_mark(td, io_u); + workqueue_enqueue(&td->io_wq, &io_u->work); ret = FIO_Q_QUEUED; @@ -1350,6 +1450,11 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done) } else { ret = io_u_submit(td, io_u); + if (td_write(td) && ddir == DDIR_WRITE && + td->o.experimental_verify && + (ret == FIO_Q_QUEUED || + (ret == FIO_Q_COMPLETED && !io_u->error))) + ev_map_mark(td, io_u); if (ddir_rw(ddir) && should_check_rate(td)) td->rate_next_io_time[ddir] = usec_for_io(td, ddir); @@ -2213,6 +2318,10 @@ static void *thread_main(void *data) prune_io_piece_log(td); + if (o->experimental_verify && o->norandommap && o->do_verify && + o->verify != VERIFY_NONE && !ev_map_init(td)) + break; + if (td->o.verify_only && td_write(td)) { verify_bytes = do_dry_run(td); if (!verify_bytes) @@ -2365,6 +2474,17 @@ static void *thread_main(void *data) verify_free_state(td); td_zone_free_index(td); + if (td->ev_maps) { + unsigned int i; + + for (i = 0; i < td->files_index; i++) { + if (td->ev_maps[i]) + axmap_free(td->ev_maps[i]); + } + free(td->ev_maps); + td->ev_maps = NULL; + } + if (fio_option_is_set(o, cpumask)) { ret = fio_cpuset_exit(&o->cpumask); if (ret) diff --git a/fio.h b/fio.h index b05abf770f..86ec5a0257 100644 --- a/fio.h +++ b/fio.h @@ -461,6 +461,7 @@ struct thread_data { struct rb_root io_hist_tree; struct flist_head io_hist_list; unsigned long io_hist_len; + struct axmap **ev_maps; /* * For IO replaying From e49373fd0a73f9dbe42bc35c3543dd1e81953c55 Mon Sep 17 00:00:00 2001 From: Yehor Malikov Date: Wed, 17 Jun 2026 22:11:29 +0200 Subject: [PATCH 2/3] verify: rebuild experimental replay bitmap for verify_only A verify_only run starts in a fresh fio process, so it cannot reuse the bitmap built during the original write run. For experimental_verify with norandommap, rebuild the bitmap during the dry-run pass by replaying the write workload without issuing writes. Reset replay-sensitive counters and file state before the verify read pass so experimental_verify replays the same offset sequence again and checks only blocks that were actually written. Signed-off-by: Yehor Malikov --- backend.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/backend.c b/backend.c index 7f7433b974..c4db939bb1 100644 --- a/backend.c +++ b/backend.c @@ -2035,6 +2035,10 @@ static uint64_t do_dry_run(struct thread_data *td) log_io_piece(td, io_u); } + if (td_write(td) && io_u->ddir == DDIR_WRITE && + td->o.experimental_verify) + ev_map_mark(td, io_u); + ret = io_u_sync_complete(td, io_u); (void) ret; } @@ -2042,6 +2046,23 @@ static uint64_t do_dry_run(struct thread_data *td) return td->bytes_done[DDIR_WRITE] + td->bytes_done[DDIR_TRIM]; } +static void reset_experimental_verify_state(struct thread_data *td) +{ + struct fio_file *f; + unsigned int i; + + memset(td->io_issues, 0, sizeof(td->io_issues)); + memset(td->io_issue_bytes, 0, sizeof(td->io_issue_bytes)); + td->verify_read_issues = 0; + td->bytes_verified = 0; + td->ddir_seq_nr = 1; + td->last_ddir_issued = DDIR_INVAL; + td->last_ddir_completed = DDIR_INVAL; + + for_each_file(td, f, i) + fio_file_reset(td, f); +} + struct fork_data { struct thread_data *td; struct sk_out *sk_out; @@ -2391,6 +2412,16 @@ static void *thread_main(void *data) continue; clear_io_state(td, 0); + /* + * Only verify_only runs replay the write workload via + * do_dry_run() before the verify pass, so only they need the + * replay-sensitive counters and file state reset here. Doing + * this for ordinary write+verify runs would zero + * verify_read_issues mid-stream and break numberio accounting + * across loops/time_based jobs. + */ + if (o->experimental_verify && o->verify_only) + reset_experimental_verify_state(td); fio_gettime(&td->start, NULL); From 4b593bb4de8cf1577992dbe0352ced066454755f Mon Sep 17 00:00:00 2001 From: Yehor Malikov Date: Wed, 17 Jun 2026 22:11:39 +0200 Subject: [PATCH 3/3] verify: add coverage for experimental replay Enable verify_only coverage for experimental_verify now that replay state is reset correctly, and add an experimental_verify case to the verify header test matrix. Also disable write sequence checking for experimental_verify overlap-risk workloads unless explicitly requested, since duplicate overwrites cannot be sequence-verified without the full write history. Signed-off-by: Yehor Malikov --- init.c | 4 ++++ t/verify.py | 20 ++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/init.c b/init.c index d1bf6bfbcf..7f332417fe 100644 --- a/init.c +++ b/init.c @@ -933,6 +933,10 @@ static int fixup_options(struct thread_data *td) o->verify_write_sequence = 0; } + if (o->experimental_verify && fio_offset_overlap_risk(td) && + !fio_option_is_set(o, verify_write_sequence)) + o->verify_write_sequence = 0; + /* * Verify header should not be offset beyond the verify * interval. diff --git a/t/verify.py b/t/verify.py index 4c3d0a3c61..00603c1781 100755 --- a/t/verify.py +++ b/t/verify.py @@ -301,6 +301,19 @@ def check_result(self): "test_class": VerifyTest, "success": SUCCESS_DEFAULT, }, + { + # Basic test using experimental verify replay + "test_id": 2004, + "fio_opts": { + "ioengine": "libaio", + "filesize": "1M", + "bs": 4096, + "experimental_verify": 1, + "output-format": "json", + }, + "test_class": VerifyTest, + "success": SUCCESS_DEFAULT, + }, ] # @@ -541,12 +554,7 @@ def verify_test_header(test_env, args, csum, mode, sequence): {sequential, random w/randommap, random w/norandommap, sequence modifiers} """ for test in TEST_LIST_HEADER: - # experimental_verify does not work in verify_only=1 mode - if "_vo" in mode and 'experimental_verify' in test['fio_opts'] and \ - test['fio_opts']['experimental_verify']: - test['force_skip'] = True - else: - test['force_skip'] = False + test['force_skip'] = False test['fio_opts']['verify'] = csum if csum in ('pattern', 'pattern_hdr'):