Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions rust/mlt-core/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,22 @@ test = false
doc = false
bench = false

# Differential fuzzer comparing the Rust decoder and the C++ `mlt-cpp-json`
# tool. Set $MLT_CPP_JSON_BIN to the built binary before running.
[[bin]]
name = "differential"
path = "fuzz_targets/differential.rs"
test = false
doc = false
bench = false

[dependencies]
arbitrary = { version = "1.4.2", features = ["derive"] }
hex = "0.4.3"
libfuzzer-sys = "0.4"
mlt-core = { path = "..", features = ["arbitrary", "__private"] }
pretty_assertions = "1.4"
serde_json = "1"

[lints.rust]
unsafe_code = "forbid"
Expand Down
35 changes: 35 additions & 0 deletions rust/mlt-core/fuzz/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,41 @@ Tests the `Layer` parser and serializer by generating arbitrary `LayerInput` val

If a mismatch is found, the fuzzer panics with a detailed error message showing both the input and output in hexadecimal format.

### `differential`

**Location:** `fuzz_targets/differential.rs`

Compares the Rust decoder against the C++ decoder.
Each input is an arbitrary `StagedLayer`.
The target:

1. encodes it to MLT bytes with the Rust encoder,
2. decodes those bytes with the Rust decoder to a `FeatureCollection` JSON, and
3. decodes the same bytes with the C++ `mlt-cpp-json` tool, run as a subprocess.

The two JSON outputs must match.
A mismatch is a crash.
The C++ tool failing on bytes the Rust decoder accepted is also a crash.
The crash report prints both outputs and the input bytes as hex.

The target runs the prebuilt `mlt-cpp-json` binary, so no C++ code or build changes are needed.
That binary is part of the cpp CMake project.
Build it once, then set `$MLT_CPP_JSON_BIN` to its path:

```bash
# Build the C++ tool (from the cpp/ directory)
cmake -S cpp -B cpp/build -DMLT_WITH_TESTS=OFF
cmake --build cpp/build --target mlt-cpp-json

# Run the fuzzer (from rust/mlt-core/fuzz)
export MLT_CPP_JSON_BIN="$PWD/../../../cpp/build/tool/mlt-cpp-json"
cargo +nightly fuzz run differential
```

Coverage comes from the instrumented Rust encode and decode path.
The C++ decoder is a black-box oracle.
Numbers are compared by value, so `0` and `0.0` count as equal.

## Corpus

The `corpus/layer` directory contains input files that have been discovered during fuzzing. These serve as:
Expand Down
8 changes: 8 additions & 0 deletions rust/mlt-core/fuzz/fuzz_targets/differential.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#![no_main]

use libfuzzer_sys::fuzz_target;
use mlt_fuzz::DifferentialInput;

fuzz_target!(|input: DifferentialInput| {
input.fuzz();
});
164 changes: 164 additions & 0 deletions rust/mlt-core/fuzz/src/differential.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
use std::path::PathBuf;
use std::process::Command;
use std::sync::OnceLock;

use hex::ToHex as _;
use mlt_core::encoder::{Codecs, Encoder, EncoderConfig, StagedLayer};
use mlt_core::geojson::FeatureCollection;
use mlt_core::{Decoder, Parser};

/// An arbitrary tile and encoder config, encoded by Rust and decoded by both
/// decoders. The Rust decoder and the C++ `mlt-cpp-json` tool must agree on the
/// [`FeatureCollection`] JSON.
#[derive(arbitrary::Arbitrary)]
pub struct DifferentialInput {
pub layer: StagedLayer,
pub config: EncoderConfig,
}

impl DifferentialInput {
pub fn fuzz(self) {
// Encode the arbitrary layer to MLT bytes with the fuzzed encoder
// config. These bytes are the shared input fed to both decoders.
let mut codecs = Codecs::default();
let buffer = self
.layer
.encode_into(Encoder::new(self.config), &mut codecs)
.expect("encode should not fail")
.into_layer_bytes()
.expect("into_layer_bytes should not fail");

let rust_json = rust_decode(&buffer);

let cpp_json = match cpp_decode(&buffer) {
Some(json) => json,
None => panic!(
"C++ decoder failed on bytes the Rust decoder accepted\n\
rust output: {rust_json}\n\
bytes: {}",
buffer.encode_hex::<String>()
),
};

let rust_value: serde_json::Value =
serde_json::from_str(&rust_json).expect("rust JSON should parse");
let cpp_value: serde_json::Value =
serde_json::from_str(&cpp_json).expect("C++ JSON should parse");

if !json_eq(&rust_value, &cpp_value) {
panic!(
"Rust and C++ decoders disagree\n\
rust: {rust_json}\n\
cpp: {cpp_json}\n\
bytes: {}",
buffer.encode_hex::<String>()
);
}
}
}

/// Decode MLT bytes with the Rust decoder to `FeatureCollection` JSON.
/// The format matches the output of `mlt-cpp-json`.
fn rust_decode(buffer: &[u8]) -> String {
let layers = Parser::default()
.parse_layers(buffer)
.expect("layer must re-parse");
let parsed = Decoder::default()
.decode_all(layers)
.expect("decode should not fail");
let fc = FeatureCollection::from_layers(parsed).expect("FeatureCollection should build");
serde_json::to_string(&fc).expect("FeatureCollection should serialize")
}

/// Decode MLT bytes with the C++ `mlt-cpp-json` tool.
/// Returns `None` when the tool exits non-zero, which covers decode errors and
/// thrown exceptions.
fn cpp_decode(buffer: &[u8]) -> Option<String> {
let path = temp_tile_path();
std::fs::write(path, buffer).expect("write temp tile");

let output = Command::new(cpp_json_bin())
.arg(path)
.output()
.expect("failed to run mlt-cpp-json");

if !output.status.success() {
return None;
}
Some(String::from_utf8(output.stdout).expect("C++ JSON should be valid UTF-8"))
}

/// Path to the `mlt-cpp-json` binary, from `$MLT_CPP_JSON_BIN`.
fn cpp_json_bin() -> &'static str {
static BIN: OnceLock<String> = OnceLock::new();
BIN.get_or_init(|| {
std::env::var("MLT_CPP_JSON_BIN").unwrap_or_else(|_| {
panic!(
"set MLT_CPP_JSON_BIN to the path of the `mlt-cpp-json` binary \
(build it via the cpp CMake project)"
)
})
})
}

/// A per-process temp file the C++ tool reads from.
/// `mlt-cpp-json` only accepts a file path.
/// Each input is written here and overwrites the previous one.
fn temp_tile_path() -> &'static PathBuf {
static PATH: OnceLock<PathBuf> = OnceLock::new();
PATH.get_or_init(|| std::env::temp_dir().join(format!("mlt-diff-{}.mlt", std::process::id())))
}

/// Compares two JSON values structurally.
/// Numbers are compared by value, so `0` and `0.0` count as equal.
/// This stops the two JSON libraries' integer-vs-float formatting from
/// reading as a difference.
///
/// Geometry coordinates are compared at `f32` precision because the C++ decoder
/// stores coordinates as 32-bit `float` by design. Comparing them at `f64`
/// would flag every coordinate above 2^24 as a difference and mask all other
/// divergences. Properties and extent are still compared exactly.
fn json_eq(a: &serde_json::Value, b: &serde_json::Value) -> bool {
json_eq_inner(a, b, false)
}

/// `coord` is true inside a geometry's `coordinates`, enabling `f32` tolerance.
#[allow(
clippy::cast_possible_truncation,
reason = "intentional f64->f32 narrowing to match the C++ float coordinates"
)]
fn json_eq_inner(a: &serde_json::Value, b: &serde_json::Value, coord: bool) -> bool {
use serde_json::Value::{Array, Bool, Null, Number, Object, String};
match (a, b) {
(Null, Null) => true,
(Bool(x), Bool(y)) => x == y,
(String(x), String(y)) => x == y,
(Number(x), Number(y)) => match (x.as_f64(), y.as_f64()) {
(Some(x), Some(y)) => {
x == y || (x.is_nan() && y.is_nan()) || (coord && x as f32 == y as f32)
}
_ => x == y,
},
(Array(x), Array(y)) => {
x.len() == y.len() && x.iter().zip(y).all(|(x, y)| json_eq_inner(x, y, coord))
}
(Object(x), Object(y)) => {
x.len() == y.len()
&& x.iter().all(|(k, xv)| {
y.get(k)
.is_some_and(|yv| json_eq_inner(xv, yv, coord || k == "coordinates"))
})
}
_ => false,
}
}

impl std::fmt::Debug for DifferentialInput {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"DifferentialInput {{\n\tconfig: {:#?}\n\tlayer: {:#?}\n}}",
self.config, self.layer
)
}
}
2 changes: 2 additions & 0 deletions rust/mlt-core/fuzz/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mod decoded_layer;
mod differential;
mod layer;
mod mvt_roundtrip;
pub use decoded_layer::*;
pub use differential::*;
pub use layer::*;
pub use mvt_roundtrip::*;
16 changes: 15 additions & 1 deletion rust/mlt-core/src/encoder/fuzzing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,21 @@ use arbitrary::{Arbitrary, Result, Unstructured};

use crate::encoder::model::StagedLayer;
use crate::encoder::optimizer::Presence;
use crate::encoder::{StagedId, StagedProperty, StagedSharedDict, StagedStrings};
use crate::encoder::{EncoderConfig, StagedId, StagedProperty, StagedSharedDict, StagedStrings};

impl Arbitrary<'_> for EncoderConfig {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
// Each optimization toggle is fuzzed independently via the public builder.
Ok(Self::default()
.with_tessellation(u.arbitrary()?)
.with_spatial_morton_sort(u.arbitrary()?)
.with_spatial_hilbert_sort(u.arbitrary()?)
.with_id_sort(u.arbitrary()?)
.with_fsst(u.arbitrary()?)
.with_fastpfor(u.arbitrary()?)
.with_shared_dict(u.arbitrary()?))
}
}

impl Arbitrary<'_> for StagedId {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
Expand Down
Loading