diff --git a/tests/install_upgrade_operators/crypto_policy/utils.py b/tests/install_upgrade_operators/crypto_policy/utils.py index 018feb5088..8f0197768a 100644 --- a/tests/install_upgrade_operators/crypto_policy/utils.py +++ b/tests/install_upgrade_operators/crypto_policy/utils.py @@ -289,6 +289,33 @@ def update_apiserver_crypto_policy( hco_namespace=hco_namespace, list_dependent_crs_to_check=MANAGED_CRS_LIST, ) + _wait_for_hco_webhook_ready(admin_client=admin_client, hco_namespace=hco_namespace) + + +def _get_hco_resources(admin_client: DynamicClient, namespace_name: str) -> list: + return list(HyperConverged.get(client=admin_client, namespace=namespace_name)) + + +def _wait_for_hco_webhook_ready(admin_client: DynamicClient, hco_namespace: Resource) -> None: + """Waits for the HCO webhook service to become reachable. + + After APIServer TLS changes, the conversion webhook may briefly lose endpoints + even after cluster operators report stable. Reading the HyperConverged resource + exercises the conversion webhook, confirming it is functional before subsequent + HCO modifications. + """ + sampler = TimeoutSampler( + wait_timeout=TIMEOUT_2MIN, + sleep=10, + func=_get_hco_resources, + exceptions_dict={ApiException: []}, + admin_client=admin_client, + namespace_name=hco_namespace.name, + ) + for sample in sampler: + if sample: + LOGGER.info("HCO webhook service is ready.") + return def check_service_accepts_tls_version(utility_pods: list, node: Node, service: Resource, tls_version: str) -> bool: diff --git a/utilities/operator.py b/utilities/operator.py index 446a10fbd8..0ab8c467c5 100644 --- a/utilities/operator.py +++ b/utilities/operator.py @@ -5,6 +5,7 @@ from datetime import datetime from pprint import pformat +from kubernetes.client.exceptions import ApiException from kubernetes.dynamic import DynamicClient from kubernetes.dynamic.exceptions import ResourceNotFoundError from ocp_resources.catalog_source import CatalogSource @@ -31,6 +32,7 @@ TIMEOUT_10SEC, TIMEOUT_15MIN, TIMEOUT_20MIN, + TIMEOUT_30SEC, TIMEOUT_75MIN, ) from utilities.data_collector import collect_ocp_must_gather @@ -558,10 +560,10 @@ def cluster_with_icsp(): return len(icsp_list) > 0 -def get_cluster_operator_status_conditions(admin_client, operator_conditions=None): +def get_cluster_operator_status_conditions(admin_client, operator_conditions=None, request_timeout=TIMEOUT_30SEC): operator_conditions = operator_conditions or DEFAULT_RESOURCE_CONDITIONS cluster_operator_status = {} - for cluster_operator in list(ClusterOperator.get(client=admin_client)): + for cluster_operator in list(ClusterOperator.get(client=admin_client, _request_timeout=request_timeout)): operator_name = cluster_operator.name cluster_operator_status[operator_name] = {} for condition in cluster_operator.instance.get("status", {}).get("conditions", []): @@ -598,6 +600,7 @@ def wait_for_cluster_operator_stabilize(admin_client, wait_timeout=TIMEOUT_20MIN wait_timeout=wait_timeout, sleep=10, func=get_failed_cluster_operator, + exceptions_dict={ApiException: []}, admin_client=admin_client, ) consecutive_check = 0