Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tests/observability/metrics/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_COUNT_SUCCEEDED,
KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_SUM_SUCCEEDED,
KUBEVIRT_VMI_STATUS_ADDRESSES,
KUBEVIRT_VMI_SYNC_TOTAL,
Comment thread
OhadRevah marked this conversation as resolved.
Comment thread
OhadRevah marked this conversation as resolved.
KUBEVIRT_VNC_ACTIVE_CONNECTIONS_BY_VMI,
SUM_KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_BUCKET_SUCCEEDED,
)
Expand All @@ -40,6 +41,7 @@
get_vmi_guest_os_kernel_release_info_metric_from_vm,
metric_result_output_dict_by_mountpoint,
network_packets_received,
validate_vmi_sync_total_reported_and_positive,
vnic_info_from_vm_or_vmi,
)
from tests.observability.utils import validate_metrics_value
Expand Down Expand Up @@ -676,6 +678,18 @@ def expected_cpu_affinity_metric_value(admin_client, vm_with_cpu_spec):
return str(cpu_count_from_vm_node * cpu_count_from_vm)


@pytest.fixture(scope="class")
def initial_vmi_sync_total_values(prometheus, vm_for_migration_metrics_test):
Comment thread
OhadRevah marked this conversation as resolved.
metric_query = KUBEVIRT_VMI_SYNC_TOTAL.format(vm_name=vm_for_migration_metrics_test.name)
results = validate_vmi_sync_total_reported_and_positive(prometheus=prometheus, metric_query=metric_query)
return {result["metric"]["pod"]: float(result["value"][1]) for result in results}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.


@pytest.fixture(scope="class")
def deleted_vmi_sync_total_vm(vm_for_migration_metrics_test):
vm_for_migration_metrics_test.delete(wait=True)
Comment thread
coderabbitai[bot] marked this conversation as resolved.


@pytest.fixture(scope="class")
def vm_for_nad_swap_test(
unprivileged_client,
Expand Down
1 change: 1 addition & 0 deletions tests/observability/metrics/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@
]

KUBEVIRT_VMI_NODE_CPU_AFFINITY = "kubevirt_vmi_node_cpu_affinity{{kubernetes_vmi_label_kubevirt_io_domain='{vm_name}'}}"
KUBEVIRT_VMI_SYNC_TOTAL = "kubevirt_vmi_sync_total{{name='{vm_name}'}}"
Comment thread
coderabbitai[bot] marked this conversation as resolved.
29 changes: 24 additions & 5 deletions tests/observability/metrics/test_vms_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@
KUBEVIRT_VM_DISK_ALLOCATED_SIZE_BYTES,
KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_COUNT_SUCCEEDED,
KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_SUM_SUCCEEDED,
KUBEVIRT_VMI_SYNC_TOTAL,
KUBEVIRT_VNC_ACTIVE_CONNECTIONS_BY_VMI,
SUM_KUBEVIRT_VMI_PHASE_TRANSITION_TIME_FROM_DELETION_SECONDS_BUCKET_SUCCEEDED,
)
from tests.observability.metrics.utils import (
compare_metric_file_system_values_with_vm_file_system_values,
get_pvc_size_bytes,
timestamp_to_seconds,
validate_metric_value_cleared,
validate_metric_value_greater_than_initial_value,
validate_vmi_sync_total_after_migration,
validate_vmi_sync_total_reported_and_positive,
validate_vnic_info,
)
from tests.observability.utils import validate_metrics_value
Expand Down Expand Up @@ -566,10 +570,8 @@ class TestVmiSyncTotal:
- Prometheus access configured
"""

__test__ = False

@pytest.mark.polarion("CNV-16271")
def test_kubevirt_vmi_sync_total(self):
def test_kubevirt_vmi_sync_total(self, prometheus, vm_for_migration_metrics_test):
"""
Test that kubevirt_vmi_sync_total metric is reported by both
virt-controller and virt-handler after a VM starts.
Expand All @@ -582,9 +584,16 @@ def test_kubevirt_vmi_sync_total(self):
- Two metric entries are returned — one from virt-controller
and one from virt-handler — each with a value greater than 0
"""
validate_vmi_sync_total_reported_and_positive(
prometheus=prometheus,
metric_query=KUBEVIRT_VMI_SYNC_TOTAL.format(vm_name=vm_for_migration_metrics_test.name),
)

@pytest.mark.polarion("CNV-16272")
def test_kubevirt_vmi_sync_total_increases_after_migration(self):
@pytest.mark.usefixtures("migration_succeeded_scope_class")
def test_kubevirt_vmi_sync_total_increases_after_migration(
self, prometheus, initial_vmi_sync_total_values, vm_for_migration_metrics_test
):
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"""
Test that kubevirt_vmi_sync_total metric value increases after
a VM live migration.
Expand All @@ -602,9 +611,15 @@ def test_kubevirt_vmi_sync_total_increases_after_migration(self):
- Metric values from both virt-controller and virt-handler
are greater than the values recorded before migration
"""
validate_vmi_sync_total_after_migration(
prometheus=prometheus,
metric_query=KUBEVIRT_VMI_SYNC_TOTAL.format(vm_name=vm_for_migration_metrics_test.name),
initial_values=initial_vmi_sync_total_values,
)

@pytest.mark.polarion("CNV-16273")
def test_kubevirt_vmi_sync_total_cleared_after_vm_deletion(self):
@pytest.mark.usefixtures("deleted_vmi_sync_total_vm")
def test_kubevirt_vmi_sync_total_cleared_after_vm_deletion(self, prometheus, vm_for_migration_metrics_test):
"""
Test that kubevirt_vmi_sync_total metric entry is removed
after the VM is deleted.
Expand All @@ -620,3 +635,7 @@ def test_kubevirt_vmi_sync_total_cleared_after_vm_deletion(self):
Expected:
- Metric value is None
"""
validate_metric_value_cleared(
prometheus=prometheus,
metric_name=KUBEVIRT_VMI_SYNC_TOTAL.format(vm_name=vm_for_migration_metrics_test.name),
)
117 changes: 116 additions & 1 deletion tests/observability/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
)
from utilities.constants import Images
from utilities.constants.aaq import NODE_STR
from utilities.constants.components import VIRT_HANDLER
from utilities.constants.components import (
VIRT_CONTROLLER,
VIRT_HANDLER,
)
from utilities.constants.images import OS_FLAVOR_WINDOWS
from utilities.constants.storage import (
CAPACITY,
Expand Down Expand Up @@ -815,3 +818,115 @@ def _get_metric_values():
return metric_sample

raise TimeoutError("Timed out waiting for Prometheus metrics to match expected values.")


def validate_vmi_sync_total_reported_and_positive(
Comment thread
OhadRevah marked this conversation as resolved.
prometheus: Prometheus,
metric_query: str,
) -> list[dict[str, str]]:
"""Polls until kubevirt_vmi_sync_total has positive values from both virt-controller and virt-handler.

Args:
prometheus: Prometheus client instance.
metric_query: PromQL query for kubevirt_vmi_sync_total.

Returns:
List of Prometheus result dicts from the first passing sample.

Raises:
TimeoutExpiredError: If the metric does not stabilize within TIMEOUT_4MIN.
"""
samples = TimeoutSampler(
wait_timeout=TIMEOUT_4MIN,
sleep=TIMEOUT_15SEC,
func=prometheus.query_sampler,
query=metric_query,
)
sample = None
try:
for sample in samples:
if not sample or len(sample) < 2:
Comment thread
OhadRevah marked this conversation as resolved.
continue
pods = {result["metric"]["pod"] for result in sample}
has_controller = any(pod.startswith(VIRT_CONTROLLER) for pod in pods)
has_handler = any(pod.startswith(VIRT_HANDLER) for pod in pods)
all_positive = all(float(result["value"][1]) > 0 for result in sample)
if has_controller and has_handler and all_positive:
return sample
except TimeoutExpiredError:
LOGGER.error(f"Expected entries from both virt-controller and virt-handler, got: {sample}")
raise
return []


def validate_vmi_sync_total_after_migration(
prometheus: Prometheus,
metric_query: str,
initial_values: dict[str, float],
) -> None:
"""Polls until virt-controller values increase and a new virt-handler pod reports a positive value.

Args:
prometheus: Prometheus client instance.
metric_query: PromQL query for kubevirt_vmi_sync_total.
initial_values: Pod-to-value mapping captured before migration.

Raises:
TimeoutExpiredError: If the expected post-migration pattern is not observed within TIMEOUT_4MIN.
"""
samples = TimeoutSampler(
Comment thread
OhadRevah marked this conversation as resolved.
wait_timeout=TIMEOUT_4MIN,
sleep=TIMEOUT_15SEC,
func=prometheus.query_sampler,
query=metric_query,
)
current_values = None
try:
for sample in samples:
if sample:
current_values = {result["metric"]["pod"]: float(result["value"][1]) for result in sample}
controller_same_and_increased = all(
pod in current_values and current_values[pod] > value
for pod, value in initial_values.items()
if pod.startswith(VIRT_CONTROLLER)
)
new_handler_with_value = any(
pod.startswith(VIRT_HANDLER) and pod not in initial_values and value > 0
for pod, value in current_values.items()
)
if controller_same_and_increased and new_handler_with_value:
return
except TimeoutExpiredError:
LOGGER.error(f"Post-migration validation failed. Initial: {initial_values}, current: {current_values}")
raise


def validate_metric_value_cleared(
prometheus: Prometheus,
metric_name: str,
timeout: int = TIMEOUT_4MIN,
) -> None:
"""Polls until the metric returns no samples or all values are zero.

Args:
prometheus: Prometheus client instance.
metric_name: PromQL query for the metric to check.
timeout: Maximum wait time in seconds.

Raises:
TimeoutExpiredError: If the metric still has non-zero values after timeout.
"""
samples = TimeoutSampler(
wait_timeout=timeout,
sleep=TIMEOUT_15SEC,
func=prometheus.query_sampler,
query=metric_name,
)
sample = None
try:
for sample in samples:
if not sample or all(result["value"][1] == "0" for result in sample):
Comment thread
OhadRevah marked this conversation as resolved.
return
except TimeoutExpiredError:
LOGGER.error(f"Metric {metric_name} still has non-zero values: {sample}")
raise