diff --git a/rust/mlt-core/fuzz/Cargo.toml b/rust/mlt-core/fuzz/Cargo.toml index 906bd347e..508c524e5 100644 --- a/rust/mlt-core/fuzz/Cargo.toml +++ b/rust/mlt-core/fuzz/Cargo.toml @@ -34,12 +34,20 @@ test = false doc = false bench = false +[[bin]] +name = "differential" +path = "fuzz_targets/differential.rs" +test = false +doc = false +bench = false + [dependencies] arbitrary = { version = "1.4.2", features = ["derive"] } hex = "0.4.3" libfuzzer-sys = "0.4" mlt-core = { path = "..", features = ["arbitrary", "__private"] } pretty_assertions = "1.4" +serde_json = "1" [lints.rust] unsafe_code = "forbid" diff --git a/rust/mlt-core/fuzz/README.md b/rust/mlt-core/fuzz/README.md index 49c00bb3d..19bcd20f3 100644 --- a/rust/mlt-core/fuzz/README.md +++ b/rust/mlt-core/fuzz/README.md @@ -69,6 +69,41 @@ Tests the `Layer` parser and serializer by generating arbitrary `LayerInput` val If a mismatch is found, the fuzzer panics with a detailed error message showing both the input and output in hexadecimal format. +### `differential` + +**Location:** `fuzz_targets/differential.rs` + +Compares the Rust decoder against the C++ decoder. +Each input is an arbitrary `StagedLayer`. +The target: + +1. encodes it to MLT bytes with the Rust encoder, +2. decodes those bytes with the Rust decoder to a `FeatureCollection` JSON, and +3. decodes the same bytes with the C++ `mlt-cpp-json` tool, run as a subprocess. + +The two JSON outputs must match. +A mismatch is a crash, and the report prints both outputs and the input bytes as hex. +If the C++ tool fails to decode the bytes (e.g. an unsupported encoding), the +input is skipped rather than treated as a failure. + +The target runs the prebuilt `mlt-cpp-json` binary, so no C++ code or build changes are needed. +That binary is part of the cpp CMake project. +Build it once, then set `$MLT_CPP_JSON_BIN` to its path: + +```bash +# Build the C++ tool (from the cpp/ directory) +cmake -S cpp -B cpp/build -DMLT_WITH_TESTS=OFF +cmake --build cpp/build --target mlt-cpp-json + +# Run the fuzzer (from rust/mlt-core/fuzz) +export MLT_CPP_JSON_BIN="$PWD/../../../cpp/build/tool/mlt-cpp-json" +cargo +nightly fuzz run differential +``` + +Coverage comes from the instrumented Rust encode and decode path. +The C++ decoder is a black-box oracle. +Numbers are compared by value, so `0` and `0.0` count as equal. + ## Corpus The `corpus/layer` directory contains input files that have been discovered during fuzzing. These serve as: diff --git a/rust/mlt-core/fuzz/fuzz_targets/differential.rs b/rust/mlt-core/fuzz/fuzz_targets/differential.rs new file mode 100644 index 000000000..2ca9b872f --- /dev/null +++ b/rust/mlt-core/fuzz/fuzz_targets/differential.rs @@ -0,0 +1,8 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use mlt_fuzz::DifferentialInput; + +fuzz_target!(|input: DifferentialInput| { + input.fuzz(); +}); diff --git a/rust/mlt-core/fuzz/src/differential.rs b/rust/mlt-core/fuzz/src/differential.rs new file mode 100644 index 000000000..16b5b9a6e --- /dev/null +++ b/rust/mlt-core/fuzz/src/differential.rs @@ -0,0 +1,164 @@ +use std::path::PathBuf; +use std::process::Command; +use std::sync::OnceLock; + +use hex::ToHex as _; +use mlt_core::encoder::{Codecs, Encoder, EncoderConfig, StagedLayer}; +use mlt_core::geojson::FeatureCollection; +use mlt_core::{Decoder, Parser}; + +/// An arbitrary tile and encoder config, encoded by Rust and decoded by both +/// decoders. The Rust decoder and the C++ `mlt-cpp-json` tool must agree on the +/// [`FeatureCollection`] JSON. +#[derive(arbitrary::Arbitrary)] +pub struct DifferentialInput { + pub layer: StagedLayer, + pub config: EncoderConfig, +} + +impl DifferentialInput { + pub fn fuzz(self) { + // Encode the arbitrary layer to MLT bytes with the fuzzed encoder + // config. These bytes are the shared input fed to both decoders. + let mut codecs = Codecs::default(); + let buffer = self + .layer + .encode_into(Encoder::new(self.config), &mut codecs) + .expect("encode should not fail") + .into_layer_bytes() + .expect("into_layer_bytes should not fail"); + + let rust_json = rust_decode(&buffer); + + // A C++ decode failure (unsupported technique, thrown exception) is not + // a mismatch. Skip these inputs and only flag genuine disagreements + // between the two decoders' output. + let Some(cpp_json) = cpp_decode(&buffer) else { + return; + }; + + let rust_value: serde_json::Value = + serde_json::from_str(&rust_json).expect("rust JSON should parse"); + let cpp_value: serde_json::Value = + serde_json::from_str(&cpp_json).expect("C++ JSON should parse"); + + assert!( + json_eq(&rust_value, &cpp_value), + "Rust and C++ decoders disagree\n\ + rust: {rust_json}\n\ + cpp: {cpp_json}\n\ + bytes: {}", + buffer.encode_hex::() + ); + } +} + +/// Decode MLT bytes with the Rust decoder to `FeatureCollection` JSON. +/// The format matches the output of `mlt-cpp-json`. +fn rust_decode(buffer: &[u8]) -> String { + let layers = Parser::default() + .parse_layers(buffer) + .expect("layer must re-parse"); + let parsed = Decoder::default() + .decode_all(layers) + .expect("decode should not fail"); + let fc = FeatureCollection::from_layers(parsed).expect("FeatureCollection should build"); + serde_json::to_string(&fc).expect("FeatureCollection should serialize") +} + +/// Decode MLT bytes with the C++ `mlt-cpp-json` tool. +/// Returns `None` when the tool exits non-zero, which covers decode errors and +/// thrown exceptions. +fn cpp_decode(buffer: &[u8]) -> Option { + let path = temp_tile_path(); + std::fs::write(path, buffer).expect("write temp tile"); + + let output = Command::new(cpp_json_bin()) + .arg(path) + .output() + .expect("failed to run mlt-cpp-json"); + + if !output.status.success() { + return None; + } + Some(String::from_utf8(output.stdout).expect("C++ JSON should be valid UTF-8")) +} + +/// Path to the `mlt-cpp-json` binary, from `$MLT_CPP_JSON_BIN`. +fn cpp_json_bin() -> &'static str { + static BIN: OnceLock = OnceLock::new(); + BIN.get_or_init(|| { + std::env::var("MLT_CPP_JSON_BIN").unwrap_or_else(|_| { + panic!( + "set MLT_CPP_JSON_BIN to the path of the `mlt-cpp-json` binary \ + (build it via the cpp CMake project)" + ) + }) + }) +} + +/// A per-process temp file the C++ tool reads from. +/// `mlt-cpp-json` only accepts a file path. +/// Each input is written here and overwrites the previous one. +fn temp_tile_path() -> &'static PathBuf { + static PATH: OnceLock = OnceLock::new(); + PATH.get_or_init(|| std::env::temp_dir().join(format!("mlt-diff-{}.mlt", std::process::id()))) +} + +/// Compares two JSON values structurally. +/// Numbers are compared by value, so `0` and `0.0` count as equal. +/// This stops the two JSON libraries' integer-vs-float formatting from +/// reading as a difference. +/// +/// Geometry coordinates are compared at `f32` precision because the C++ decoder +/// stores coordinates as 32-bit `float` by design. Comparing them at `f64` +/// would flag every coordinate above 2^24 as a difference and mask all other +/// divergences. Properties and extent are still compared exactly. +fn json_eq(a: &serde_json::Value, b: &serde_json::Value) -> bool { + json_eq_inner(a, b, false) +} + +/// `coord` is true inside a geometry's `coordinates`, enabling `f32` tolerance. +#[allow( + clippy::cast_possible_truncation, + reason = "intentional f64->f32 narrowing to match the C++ float coordinates" +)] +#[allow( + clippy::float_cmp, + reason = "exact equality is intended; the f32 cast and NaN checks are the explicit tolerances" +)] +fn json_eq_inner(a: &serde_json::Value, b: &serde_json::Value, coord: bool) -> bool { + use serde_json::Value::{Array, Bool, Null, Number, Object, String}; + match (a, b) { + (Null, Null) => true, + (Bool(x), Bool(y)) => x == y, + (String(x), String(y)) => x == y, + (Number(x), Number(y)) => match (x.as_f64(), y.as_f64()) { + (Some(x), Some(y)) => { + x == y || (x.is_nan() && y.is_nan()) || (coord && x as f32 == y as f32) + } + _ => x == y, + }, + (Array(x), Array(y)) => { + x.len() == y.len() && x.iter().zip(y).all(|(x, y)| json_eq_inner(x, y, coord)) + } + (Object(x), Object(y)) => { + x.len() == y.len() + && x.iter().all(|(k, xv)| { + y.get(k) + .is_some_and(|yv| json_eq_inner(xv, yv, coord || k == "coordinates")) + }) + } + _ => false, + } +} + +impl std::fmt::Debug for DifferentialInput { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "DifferentialInput {{\n\tconfig: {:#?}\n\tlayer: {:#?}\n}}", + self.config, self.layer + ) + } +} diff --git a/rust/mlt-core/fuzz/src/lib.rs b/rust/mlt-core/fuzz/src/lib.rs index edc3e1461..e19473894 100644 --- a/rust/mlt-core/fuzz/src/lib.rs +++ b/rust/mlt-core/fuzz/src/lib.rs @@ -1,6 +1,8 @@ mod decoded_layer; +mod differential; mod layer; mod mvt_roundtrip; pub use decoded_layer::*; +pub use differential::*; pub use layer::*; pub use mvt_roundtrip::*; diff --git a/rust/mlt-core/src/encoder/fuzzing.rs b/rust/mlt-core/src/encoder/fuzzing.rs index 8107237f3..cdd54bca9 100644 --- a/rust/mlt-core/src/encoder/fuzzing.rs +++ b/rust/mlt-core/src/encoder/fuzzing.rs @@ -3,7 +3,21 @@ use arbitrary::{Arbitrary, Result, Unstructured}; use crate::encoder::model::StagedLayer; use crate::encoder::optimizer::Presence; -use crate::encoder::{StagedId, StagedProperty, StagedSharedDict, StagedStrings}; +use crate::encoder::{EncoderConfig, StagedId, StagedProperty, StagedSharedDict, StagedStrings}; + +impl Arbitrary<'_> for EncoderConfig { + fn arbitrary(u: &mut Unstructured<'_>) -> Result { + // Each optimization toggle is fuzzed independently via the public builder. + Ok(Self::default() + .with_tessellation(u.arbitrary()?) + .with_spatial_morton_sort(u.arbitrary()?) + .with_spatial_hilbert_sort(u.arbitrary()?) + .with_id_sort(u.arbitrary()?) + .with_fsst(u.arbitrary()?) + .with_fastpfor(u.arbitrary()?) + .with_shared_dict(u.arbitrary()?)) + } +} impl Arbitrary<'_> for StagedId { fn arbitrary(u: &mut Unstructured<'_>) -> Result {