Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 25 additions & 28 deletions benches/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ criterion_group!(
decode_percent,
decode_punycode,
decrypt,
dns_lookup,
// dns_lookup do network calls so doens't make sense to benchmark it
//dns_lookup,
del,
decrypt_ip,
downcase,
Expand Down Expand Up @@ -141,7 +142,8 @@ criterion_group!(
redact,
remove,
replace,
reverse_dns,
// reverse_dns do network calls so doens't make sense to benchmark it
//reverse_dns,
round,
seahash,
set,
Expand Down Expand Up @@ -797,27 +799,27 @@ bench_function! {
ip_ntop => vrl::stdlib::IpNtop;

ipv4 {
args: func_args![value: "1.2.3.4"],
want: Ok(value!("\x01\x02\x03\x04")),
args: func_args![value: "\x01\x02\x03\x04"],
want: Ok(value!("1.2.3.4")),
}

ipv6 {
args: func_args![value: "102:304:506:708:90a:b0c:d0e:f10"],
want: Ok(value!("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10")),
args: func_args![value: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"],
want: Ok(value!("102:304:506:708:90a:b0c:d0e:f10")),
}
}

bench_function! {
ip_pton => vrl::stdlib::IpPton;

ipv4 {
args: func_args![value: "\x01\x02\x03\x04"],
want: Ok(value!("1.2.3.4")),
args: func_args![value: "1.2.3.4"],
want: Ok(value!("\x01\x02\x03\x04")),
}

ipv6 {
args: func_args![value: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"],
want: Ok(value!("102:304:506:708:90a:b0c:d0e:f10")),
args: func_args![value: "102:304:506:708:90a:b0c:d0e:f10"],
want: Ok(value!("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10")),
}
}

Expand Down Expand Up @@ -963,7 +965,7 @@ bench_function! {

not_string {
args: func_args![value: 42],
want: Ok(false),
want: Err("expected string, got integer"),
}

ipv4 {
Expand All @@ -987,7 +989,7 @@ bench_function! {

not_string {
args: func_args![value: 42],
want: Ok(false),
want: Err("expected string, got integer"),
}

ipv4 {
Expand Down Expand Up @@ -1020,7 +1022,7 @@ bench_function! {
}

exact_variant {
args: func_args![value: r#"{"key": "value""#, variant: "object"],
args: func_args![value: r#"{"key": "value"}"#, variant: "object"],
want: Ok(true),
}
}
Expand Down Expand Up @@ -1263,7 +1265,7 @@ bench_function! {
}

equals_facet {
args: func_args![value: value!({"custom": {"z": 1}}), query: "@z:1"],
args: func_args![value: value!({"z": 1}), query: "@z:1"],
want: Ok(true),
}

Expand All @@ -1278,7 +1280,7 @@ bench_function! {
}

wildcard_suffix_facet {
args: func_args![value: value!({"custom": {"a": "vector"}}), query: "@a:vec*"],
args: func_args![value: value!({"a": "vector"}), query: "@a:vec*"],
want: Ok(true),
}

Expand Down Expand Up @@ -1323,7 +1325,7 @@ bench_function! {
}

kitchen_sink_2 {
args: func_args![value: value!({"tags": ["c:that", "d:the_other"], "custom": {"b": "testing", "e": 3}}), query: "host:this OR ((@b:test* AND c:that) AND d:the_other @e:[1 TO 5])"],
args: func_args![value: value!({"tags": ["c:that", "d:the_other"], "b": "testing", "e": 3}), query: "host:this OR ((@b:test* AND c:that) AND d:the_other @e:[1 TO 5])"],
want: Ok(true),
}
}
Expand Down Expand Up @@ -1467,6 +1469,7 @@ bench_function! {
"target_status_code_list": ["200"],
"timestamp": "2018-07-02T22:23:00.186641Z",
"trace_id": "Root=1-58337262-36d228ad5d99923122bbe354",
"traceability_id": null,
"type": "http",
"user_agent": "curl/7.46.0"
})),
Expand Down Expand Up @@ -1528,7 +1531,7 @@ bench_function! {
format: "version interface_id account_id vpc_id subnet_id instance_id srcaddr dstaddr srcport dstport protocol tcp_flags type pkt_srcaddr pkt_dstaddr action log_status",
],
want: Ok(value!({
"account_id": 123456789010i64,
"account_id": "123456789010",
"action": "ACCEPT",
"dstaddr": "10.40.2.236",
"dstport": 80,
Expand Down Expand Up @@ -1964,17 +1967,14 @@ bench_function! {

combined {
args: func_args![
value: r#"172.17.0.1 alice - [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#,
value: r#"172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#,
format: "combined",
],
want: Ok(value!({
"client": "172.17.0.1",
"user": "alice",
"timestamp": (DateTime::parse_from_rfc3339("2021-04-01T12:02:31Z").unwrap().with_timezone(&Utc)),
"request": "POST /not-found HTTP/1.1",
"method": "POST",
"path": "/not-found",
"protocol": "HTTP/1.1",
"status": 404,
"size": 153,
"referer": "http://localhost/somewhere",
Expand All @@ -1992,9 +1992,6 @@ bench_function! {
"remote_addr": "0.0.0.0",
"timestamp": (DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().with_timezone(&Utc)),
"request": "GET /some/path HTTP/2.0",
"method": "GET",
"path": "/some/path",
"protocol": "HTTP/2.0",
"status": 200,
"body_bytes_size": 12312,
"http_referer": "https://10.0.0.1/some/referer",
Expand Down Expand Up @@ -2074,7 +2071,7 @@ const PARSE_REGEX_LARGE_INPUT: &str = concat!(
" HTTP/1.1\" 200 20574 \"-\" \"Mozilla/5.0 (compatible; Datadog Agent/7.x; +https://docs.datadoghq.com/agent/)\"",
" 0.042 upstream_addr=10.0.1.55:8080 upstream_status=200 request_id=req-abc123def456",
);
const PARSE_REGEX_SINGLE_MATCH_PATTERN: &str = "(?P<number>.*?) group";
const PARSE_REGEX_SINGLE_MATCH_PATTERN: &str = r"(?P<number>[^\s-]*?) group";

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep benchmarking the documented parse_regex pattern

This shared constant also feeds the parse_regex/single_match benchmark, while the documented example and unit test still use (?P<number>.*?) group in src/stdlib/parse_regex.rs. That old pattern already returns {"number":"first"} for the single-match case, so changing it here only alters the workload being measured (lazy wildcard vs. negated character class) and makes future parse_regex timings incomparable with previous runs; use a separate pattern for the parse_regex_all case if that benchmark needs different captures.

Useful? React with 👍 / 👎.

const PARSE_REGEX_LARGE_INPUT_SMALL_CAPTURES_PATTERN: &str =
r#"^(?P<host>[\w\.]+) - [\w]+ [\d]+ \[(?P<timestamp>[^\]]+)\]"#;
const PARSE_REGEX_LARGE_INPUT_PATTERN: &str = r#"^(?P<host>[\w\.]+) - (?P<user>[\w]+) (?P<bytes_in>[\d]+) \[(?P<timestamp>[^\]]+)\] "(?P<method>[\w]+) (?P<path>\S+) HTTP/[\d\.]+" (?P<status>[\d]+) (?P<bytes_out>[\d]+)"#;
Expand Down Expand Up @@ -2797,13 +2794,13 @@ bench_function! {
sieve => vrl::stdlib::Sieve;

regex {
args: func_args![value: value!("test123%456.فوائد.net."), permitted_characters: regex::Regex::new("[a-z.0-9]").unwrap(), replace_single: "X", replace_repeated: "<REMOVED>"],
args: func_args![value: value!("test123%456.فوائد.net."), permitted_characters: Regex::new("[a-z.0-9]").unwrap(), replace_single: "X", replace_repeated: "<REMOVED>"],
want: Ok(value!("test123X456.<REMOVED>.net.")),
}

string {
args: func_args![value: value!("37ccx6a5uf52a7dv2hfxgpmltji09x6xkg0zv6yxsoi4kqs9atmjh7k50dcjb7z.فوائد.net."), permitted_characters: "acx.", replace_single: "0", replace_repeated: "<REMOVED>"],
want: Ok(value!("<REMOVED>ccx0a<REMOVED>a<REMOVED>x<REMOVED>x0x<Removed>x<Removed>a<Removed>c<REMOVED>.<REMOVED>.<REMOVED>.")),
args: func_args![value: value!("37ccx6a5uf52a7dv2hfxgpmltji09x6xkg0zv6yxsoi4kqs9atmjh7k50dcjb7z.فوائد.net."), permitted_characters: Regex::new(r"[acx\.]").unwrap(), replace_single: "0", replace_repeated: "<REMOVED>"],
want: Ok(value!("<REMOVED>ccx0a<REMOVED>a<REMOVED>x<REMOVED>x0x<REMOVED>x<REMOVED>a<REMOVED>c<REMOVED>.<REMOVED>.<REMOVED>.")),
}
}

Expand Down
11 changes: 11 additions & 0 deletions src/compiler/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ macro_rules! bench_function {
let tz = $crate::compiler::TimeZone::Named(chrono_tz::Tz::UTC);
let mut ctx = $crate::compiler::Context::new(&mut target, &mut runtime_state, &tz);

// Checks if function returns correct result before starting benchmarking
let got = expression.resolve(&mut ctx).map_err(|e| e.to_string());
assert_eq!(got, want);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use tolerant checks for benchmark float results

With this unconditional assert_eq! in the benchmark harness, release cargo bench --features="default test" --bench stdlib -- --test now aborts before measuring later functions when a benchmark returns a floating value that differs by one f64 ulp from the literal expectation; I ran that command and it panicked at vrl_stdlib/functions/shannon_entropy/default with 3.7369879306358205 vs 3.736987930635821. The previous check was only a debug_assert_eq!, so exact equality here makes the stdlib benchmark suite fail in normal bench runs unless floating results are compared with tolerance or handled specially.

Useful? React with 👍 / 👎.


b.iter(|| {
let got = expression.resolve(&mut ctx).map_err(|e| e.to_string());
debug_assert_eq!(got, want);
Expand All @@ -83,6 +87,13 @@ macro_rules! bench_query_function {
let tz = $crate::compiler::TimeZone::Named(chrono_tz::Tz::UTC);
let event: $crate::value::Value = $event.into();

// Checks if function returns correct result before starting benchmarking
let mut runtime_state = $crate::compiler::state::RuntimeState::default();
let mut target = event.clone();
let mut ctx = $crate::compiler::Context::new(&mut target, &mut runtime_state, &tz);
let got = expression.resolve(&mut ctx).map_err(|e| e.to_string());
assert_eq!(got, want);

b.iter_batched(|| {
let mut runtime_state = $crate::compiler::state::RuntimeState::default();
let mut target = event.clone();
Expand Down
1 change: 0 additions & 1 deletion src/stdlib/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ mod tests {
mixed_ascii_unicode {
args: func_args![value: "ab你好",
chunk_size: 4,
utf8: false
],
want: Ok(value!([b"ab\xe4\xbd", b"\xa0\xe5\xa5\xbd"])),
tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
Expand Down