diff --git a/benches/stdlib.rs b/benches/stdlib.rs index 85d3eac6b0..ea160f2a1e 100644 --- a/benches/stdlib.rs +++ b/benches/stdlib.rs @@ -31,7 +31,8 @@ criterion_group!( decode_percent, decode_punycode, decrypt, - dns_lookup, + // dns_lookup do network calls so doens't make sense to benchmark it + //dns_lookup, del, decrypt_ip, downcase, @@ -141,7 +142,8 @@ criterion_group!( redact, remove, replace, - reverse_dns, + // reverse_dns do network calls so doens't make sense to benchmark it + //reverse_dns, round, seahash, set, @@ -797,13 +799,13 @@ bench_function! { ip_ntop => vrl::stdlib::IpNtop; ipv4 { - args: func_args![value: "1.2.3.4"], - want: Ok(value!("\x01\x02\x03\x04")), + args: func_args![value: "\x01\x02\x03\x04"], + want: Ok(value!("1.2.3.4")), } ipv6 { - args: func_args![value: "102:304:506:708:90a:b0c:d0e:f10"], - want: Ok(value!("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10")), + args: func_args![value: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"], + want: Ok(value!("102:304:506:708:90a:b0c:d0e:f10")), } } @@ -811,13 +813,13 @@ bench_function! { ip_pton => vrl::stdlib::IpPton; ipv4 { - args: func_args![value: "\x01\x02\x03\x04"], - want: Ok(value!("1.2.3.4")), + args: func_args![value: "1.2.3.4"], + want: Ok(value!("\x01\x02\x03\x04")), } ipv6 { - args: func_args![value: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"], - want: Ok(value!("102:304:506:708:90a:b0c:d0e:f10")), + args: func_args![value: "102:304:506:708:90a:b0c:d0e:f10"], + want: Ok(value!("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10")), } } @@ -963,7 +965,7 @@ bench_function! { not_string { args: func_args![value: 42], - want: Ok(false), + want: Err("expected string, got integer"), } ipv4 { @@ -987,7 +989,7 @@ bench_function! { not_string { args: func_args![value: 42], - want: Ok(false), + want: Err("expected string, got integer"), } ipv4 { @@ -1020,7 +1022,7 @@ bench_function! { } exact_variant { - args: func_args![value: r#"{"key": "value""#, variant: "object"], + args: func_args![value: r#"{"key": "value"}"#, variant: "object"], want: Ok(true), } } @@ -1263,7 +1265,7 @@ bench_function! { } equals_facet { - args: func_args![value: value!({"custom": {"z": 1}}), query: "@z:1"], + args: func_args![value: value!({"z": 1}), query: "@z:1"], want: Ok(true), } @@ -1278,7 +1280,7 @@ bench_function! { } wildcard_suffix_facet { - args: func_args![value: value!({"custom": {"a": "vector"}}), query: "@a:vec*"], + args: func_args![value: value!({"a": "vector"}), query: "@a:vec*"], want: Ok(true), } @@ -1323,7 +1325,7 @@ bench_function! { } kitchen_sink_2 { - args: func_args![value: value!({"tags": ["c:that", "d:the_other"], "custom": {"b": "testing", "e": 3}}), query: "host:this OR ((@b:test* AND c:that) AND d:the_other @e:[1 TO 5])"], + args: func_args![value: value!({"tags": ["c:that", "d:the_other"], "b": "testing", "e": 3}), query: "host:this OR ((@b:test* AND c:that) AND d:the_other @e:[1 TO 5])"], want: Ok(true), } } @@ -1467,6 +1469,7 @@ bench_function! { "target_status_code_list": ["200"], "timestamp": "2018-07-02T22:23:00.186641Z", "trace_id": "Root=1-58337262-36d228ad5d99923122bbe354", + "traceability_id": null, "type": "http", "user_agent": "curl/7.46.0" })), @@ -1528,7 +1531,7 @@ bench_function! { format: "version interface_id account_id vpc_id subnet_id instance_id srcaddr dstaddr srcport dstport protocol tcp_flags type pkt_srcaddr pkt_dstaddr action log_status", ], want: Ok(value!({ - "account_id": 123456789010i64, + "account_id": "123456789010", "action": "ACCEPT", "dstaddr": "10.40.2.236", "dstport": 80, @@ -1964,7 +1967,7 @@ bench_function! { combined { args: func_args![ - value: r#"172.17.0.1 alice - [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#, + value: r#"172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#, format: "combined", ], want: Ok(value!({ @@ -1972,9 +1975,6 @@ bench_function! { "user": "alice", "timestamp": (DateTime::parse_from_rfc3339("2021-04-01T12:02:31Z").unwrap().with_timezone(&Utc)), "request": "POST /not-found HTTP/1.1", - "method": "POST", - "path": "/not-found", - "protocol": "HTTP/1.1", "status": 404, "size": 153, "referer": "http://localhost/somewhere", @@ -1992,9 +1992,6 @@ bench_function! { "remote_addr": "0.0.0.0", "timestamp": (DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().with_timezone(&Utc)), "request": "GET /some/path HTTP/2.0", - "method": "GET", - "path": "/some/path", - "protocol": "HTTP/2.0", "status": 200, "body_bytes_size": 12312, "http_referer": "https://10.0.0.1/some/referer", @@ -2074,7 +2071,7 @@ const PARSE_REGEX_LARGE_INPUT: &str = concat!( " HTTP/1.1\" 200 20574 \"-\" \"Mozilla/5.0 (compatible; Datadog Agent/7.x; +https://docs.datadoghq.com/agent/)\"", " 0.042 upstream_addr=10.0.1.55:8080 upstream_status=200 request_id=req-abc123def456", ); -const PARSE_REGEX_SINGLE_MATCH_PATTERN: &str = "(?P.*?) group"; +const PARSE_REGEX_SINGLE_MATCH_PATTERN: &str = r"(?P[^\s-]*?) group"; const PARSE_REGEX_LARGE_INPUT_SMALL_CAPTURES_PATTERN: &str = r#"^(?P[\w\.]+) - [\w]+ [\d]+ \[(?P[^\]]+)\]"#; const PARSE_REGEX_LARGE_INPUT_PATTERN: &str = r#"^(?P[\w\.]+) - (?P[\w]+) (?P[\d]+) \[(?P[^\]]+)\] "(?P[\w]+) (?P\S+) HTTP/[\d\.]+" (?P[\d]+) (?P[\d]+)"#; @@ -2797,13 +2794,13 @@ bench_function! { sieve => vrl::stdlib::Sieve; regex { - args: func_args![value: value!("test123%456.فوائد.net."), permitted_characters: regex::Regex::new("[a-z.0-9]").unwrap(), replace_single: "X", replace_repeated: ""], + args: func_args![value: value!("test123%456.فوائد.net."), permitted_characters: Regex::new("[a-z.0-9]").unwrap(), replace_single: "X", replace_repeated: ""], want: Ok(value!("test123X456..net.")), } string { - args: func_args![value: value!("37ccx6a5uf52a7dv2hfxgpmltji09x6xkg0zv6yxsoi4kqs9atmjh7k50dcjb7z.فوائد.net."), permitted_characters: "acx.", replace_single: "0", replace_repeated: ""], - want: Ok(value!("ccx0aaxx0xxac...")), + args: func_args![value: value!("37ccx6a5uf52a7dv2hfxgpmltji09x6xkg0zv6yxsoi4kqs9atmjh7k50dcjb7z.فوائد.net."), permitted_characters: Regex::new(r"[acx\.]").unwrap(), replace_single: "0", replace_repeated: ""], + want: Ok(value!("ccx0aaxx0xxac...")), } } diff --git a/src/compiler/test_util.rs b/src/compiler/test_util.rs index b49e403459..cab5a46425 100644 --- a/src/compiler/test_util.rs +++ b/src/compiler/test_util.rs @@ -57,6 +57,10 @@ macro_rules! bench_function { let tz = $crate::compiler::TimeZone::Named(chrono_tz::Tz::UTC); let mut ctx = $crate::compiler::Context::new(&mut target, &mut runtime_state, &tz); + // Checks if function returns correct result before starting benchmarking + let got = expression.resolve(&mut ctx).map_err(|e| e.to_string()); + assert_eq!(got, want); + b.iter(|| { let got = expression.resolve(&mut ctx).map_err(|e| e.to_string()); debug_assert_eq!(got, want); @@ -83,6 +87,13 @@ macro_rules! bench_query_function { let tz = $crate::compiler::TimeZone::Named(chrono_tz::Tz::UTC); let event: $crate::value::Value = $event.into(); + // Checks if function returns correct result before starting benchmarking + let mut runtime_state = $crate::compiler::state::RuntimeState::default(); + let mut target = event.clone(); + let mut ctx = $crate::compiler::Context::new(&mut target, &mut runtime_state, &tz); + let got = expression.resolve(&mut ctx).map_err(|e| e.to_string()); + assert_eq!(got, want); + b.iter_batched(|| { let mut runtime_state = $crate::compiler::state::RuntimeState::default(); let mut target = event.clone(); diff --git a/src/stdlib/chunks.rs b/src/stdlib/chunks.rs index 43d1e7f44b..a1bf3f9cf6 100644 --- a/src/stdlib/chunks.rs +++ b/src/stdlib/chunks.rs @@ -157,7 +157,6 @@ mod tests { mixed_ascii_unicode { args: func_args![value: "ab你好", chunk_size: 4, - utf8: false ], want: Ok(value!([b"ab\xe4\xbd", b"\xa0\xe5\xa5\xbd"])), tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),