From b7fe4eba55903934a62bb079c13f31b148eee77b Mon Sep 17 00:00:00 2001 From: Sergey Petrov Date: Mon, 18 May 2026 13:25:22 +0300 Subject: [PATCH 1/4] fix(litellm): detect GPT-5 family when provider prefix is in model name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPT-5 codex models only accept `temperature=1`. The handler converted `temperature` into `reasoning_effort` only when the model string started with `gpt-5`, but users frequently set the full provider-qualified name in their config (e.g. `openai/gpt-5.1-codex-max`). That prefix made the `startswith('gpt-5')` check fail, so `temperature=0.2` was sent and litellm rejected the request with `UnsupportedParamsError`. The primary model then always fell back to the secondary one — silently downgrading quality and doubling per-call latency/cost. Normalize the model name by stripping the `openai/` / `azure/` prefix before the GPT-5 check, and rebuild the routed name with the original provider so Azure routing is preserved. Adds regression tests for `openai/gpt-5*` (including `codex-max`) and for the `_thinking` suffix together with a prefix. --- .../algo/ai_handlers/litellm_ai_handler.py | 11 +++- .../unittest/test_litellm_reasoning_effort.py | 59 +++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index a6e79d7a07..7da1d2c996 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -431,7 +431,12 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: {"type": "image_url", "image_url": {"url": img_path}}] thinking_kwargs_gpt5 = None - if model.startswith('gpt-5'): + # Detect GPT-5 family regardless of provider prefix users may put in config + # (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped + # the reasoning_effort path and litellm rejected the request with + # UnsupportedParamsError for temperature=0.2. + model_base = model.removeprefix('openai/').removeprefix('azure/') + if model_base.startswith('gpt-5'): # Use configured reasoning_effort or default to MEDIUM config_effort = get_settings().config.reasoning_effort try: @@ -450,7 +455,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: "allowed_openai_params": ["reasoning_effort"], } get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model") - model = 'openai/'+model.replace('_thinking', '') # remove _thinking suffix + # Preserve azure/ routing if it was applied above; otherwise route via openai/ + provider_prefix = 'azure/' if self.azure else 'openai/' + model = provider_prefix + model_base.replace('_thinking', '') # remove _thinking suffix # Currently, some models do not support a separate system and user prompts diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py index 298fce640c..9ca7c0b90f 100644 --- a/tests/unittest/test_litellm_reasoning_effort.py +++ b/tests/unittest/test_litellm_reasoning_effort.py @@ -682,3 +682,62 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger): # Should have reasoning_effort call_kwargs = mock_completion.call_args[1] assert call_kwargs["reasoning_effort"] == "medium" + + # ========== Group 8: Provider Prefix Handling ========== + + @pytest.mark.asyncio + async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger): + """Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path. + + Before the fix, startswith('gpt-5') was False for prefixed names, so the handler + sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError + for gpt-5 codex models. + """ + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + prefixed_models = [ + "openai/gpt-5", + "openai/gpt-5.1-codex", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.4-mini", + ] + + for model in prefixed_models: + with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model=model, + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + # GPT-5 path must trigger and drop temperature in favor of reasoning_effort + assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}" + assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}" + assert "temperature" not in call_kwargs, f"temperature leaked for {model}" + # Model name passed to litellm must keep the openai/ prefix exactly once + assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}" + + @pytest.mark.asyncio + async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger): + """Prefixed _thinking models must have the suffix removed without double-prefixing.""" + fake_settings = create_mock_settings("low") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model="openai/gpt-5_thinking", + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["model"] == "openai/gpt-5" + assert call_kwargs["reasoning_effort"] == "low" From ba459fb3dc9ad643ef1c274375a4a87fccb0b587 Mon Sep 17 00:00:00 2001 From: Sergey Petrov Date: Mon, 18 May 2026 14:44:07 +0300 Subject: [PATCH 2/4] fix(litellm): preserve explicit provider prefix and dedupe stacked prefixes Address two routing issues raised by the Qodo review on the initial fix: 1. Stacked prefixes (Azure mode + provider-qualified config). With self.azure prepending "azure/" to a model that already had an "openai/" or "azure/" prefix in config, the single removeprefix() call left a residual provider segment, GPT-5 detection failed, and temperature was still sent (the original bug this PR fixes). 2. Explicit "azure/" rewritten to "openai/" when self.azure was false. The previous rebuild used "azure/ if self.azure else openai/", which ignored an explicit "azure/" the user had written in config and silently rerouted the request to OpenAI. Strip provider prefixes in a loop so any number of stacked segments is removed, and choose the rebuilt prefix by priority: Azure mode > explicit prefix in user config > "openai/" default. Capture the user's original model string before the azure auto-prepend so the explicit prefix is still visible at rebuild time. Tests added for: - explicit "azure/" preserved when self.azure is false - Azure mode with bare / openai/- / azure/- prefixed configs all producing exactly one "azure/" prefix --- .../algo/ai_handlers/litellm_ai_handler.py | 30 +++++++--- .../unittest/test_litellm_reasoning_effort.py | 57 +++++++++++++++++++ 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 7da1d2c996..386742f6cf 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -408,6 +408,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: try: resp, finish_reason = None, None deployment_id = self.deployment_id + # Capture the original model string so an explicit provider prefix in the + # user config (e.g. "azure/gpt-5...") can be preserved when the GPT-5 branch + # rebuilds the routed model name below. + user_model = model if self.azure: model = 'azure/' + model if 'claude' in model and not system: @@ -431,11 +435,14 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: {"type": "image_url", "image_url": {"url": img_path}}] thinking_kwargs_gpt5 = None - # Detect GPT-5 family regardless of provider prefix users may put in config - # (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped - # the reasoning_effort path and litellm rejected the request with - # UnsupportedParamsError for temperature=0.2. - model_base = model.removeprefix('openai/').removeprefix('azure/') + # Detect GPT-5 family regardless of provider prefix(es) on the model name. + # Users sometimes put a provider prefix in config (e.g. "openai/gpt-5.1-codex-max"), + # and Azure mode auto-prepends "azure/", which together can produce stacked prefixes + # like "azure/openai/gpt-5...". Without normalization the GPT-5 path is skipped and + # litellm rejects the request with UnsupportedParamsError for temperature=0.2. + model_base = model + while model_base.startswith(('openai/', 'azure/')): + model_base = model_base.removeprefix('openai/').removeprefix('azure/') if model_base.startswith('gpt-5'): # Use configured reasoning_effort or default to MEDIUM config_effort = get_settings().config.reasoning_effort @@ -455,8 +462,17 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: "allowed_openai_params": ["reasoning_effort"], } get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model") - # Preserve azure/ routing if it was applied above; otherwise route via openai/ - provider_prefix = 'azure/' if self.azure else 'openai/' + # Routing priority: Azure mode > explicit provider prefix in user config > openai/ + # default. This preserves an explicit "azure/" the user wrote in config even when + # self.azure is false, and avoids stacking when self.azure already added "azure/". + if self.azure: + provider_prefix = 'azure/' + elif user_model.startswith('azure/'): + provider_prefix = 'azure/' + elif user_model.startswith('openai/'): + provider_prefix = 'openai/' + else: + provider_prefix = 'openai/' model = provider_prefix + model_base.replace('_thinking', '') # remove _thinking suffix diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py index 9ca7c0b90f..acf4e3128d 100644 --- a/tests/unittest/test_litellm_reasoning_effort.py +++ b/tests/unittest/test_litellm_reasoning_effort.py @@ -741,3 +741,60 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, call_kwargs = mock_completion.call_args[1] assert call_kwargs["model"] == "openai/gpt-5" assert call_kwargs["reasoning_effort"] == "low" + + @pytest.mark.asyncio + async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypatch, mock_logger): + """Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/).""" + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + # self.azure is False by default (no Azure AD creds in test env) + await handler.chat_completion( + model="azure/gpt-5.1-codex-max", + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["reasoning_effort"] == "medium" + assert "temperature" not in call_kwargs + # Provider prefix from user config must be preserved verbatim + assert call_kwargs["model"] == "azure/gpt-5.1-codex-max" + + @pytest.mark.asyncio + async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, mock_logger): + """Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix.""" + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + # Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode + cases = [ + ("gpt-5.1-codex", "azure/gpt-5.1-codex"), + ("openai/gpt-5.1-codex", "azure/gpt-5.1-codex"), + ("azure/gpt-5.1-codex", "azure/gpt-5.1-codex"), + ] + + for input_model, expected in cases: + with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + handler.azure = True # simulate Azure-mode handler + await handler.chat_completion( + model=input_model, + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + # GPT-5 path must trigger + assert call_kwargs["reasoning_effort"] == "medium", f"failed for {input_model}" + assert "temperature" not in call_kwargs, f"temperature leaked for {input_model}" + # Exactly one azure/ prefix, no stacked/duplicated provider segments + assert call_kwargs["model"] == expected, ( + f"wrong routing for {input_model}: got {call_kwargs['model']}, expected {expected}" + ) From 66026ffc372b26e0ff853a991a32154b8ffdab7f Mon Sep 17 00:00:00 2001 From: Sergey Petrov Date: Mon, 18 May 2026 14:44:14 +0300 Subject: [PATCH 3/4] chore(tests): wrap long with-patch lines in GPT-5 prefix tests Four `with patch(...)` statements in the GPT-5 provider-prefix tests exceeded the repo's 120-character line limit (ruff line-length=120 in pyproject.toml). Wrap them onto multiple lines so the test file does not regress lint compliance for the newly added cases. --- .../unittest/test_litellm_reasoning_effort.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py index acf4e3128d..2175086dae 100644 --- a/tests/unittest/test_litellm_reasoning_effort.py +++ b/tests/unittest/test_litellm_reasoning_effort.py @@ -704,7 +704,10 @@ async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypat ] for model in prefixed_models: - with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() @@ -728,7 +731,10 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, fake_settings = create_mock_settings("low") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) - with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() @@ -748,7 +754,10 @@ async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypat fake_settings = create_mock_settings("medium") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) - with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() @@ -779,7 +788,10 @@ async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, moc ] for input_model, expected in cases: - with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() From 79513bafbacd08e58fa685e09861fb0f0c795562 Mon Sep 17 00:00:00 2001 From: Sergey Petrov Date: Mon, 18 May 2026 15:00:37 +0300 Subject: [PATCH 4/4] test(litellm): explicitly isolate GPT-5 prefix tests from runner env Address Qodo review feedback: the added Group 8 tests instantiate LiteLLMAIHandler() directly, and its __init__ branches on environment variables such as AWS_USE_IMDS, OPENAI_API_KEY, and the AWS_* set. With those vars potentially leaking from the runner environment, the tests could behave differently across machines or CI runners (e.g. attempt boto3 import when AWS_USE_IMDS is set). Explicitly delete the relevant env vars via monkeypatch.delenv() at the start of each new test, so the constructor takes a deterministic path regardless of where the suite runs. --- tests/unittest/test_litellm_reasoning_effort.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py index 2175086dae..3c4905ef01 100644 --- a/tests/unittest/test_litellm_reasoning_effort.py +++ b/tests/unittest/test_litellm_reasoning_effort.py @@ -695,6 +695,10 @@ async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypat """ fake_settings = create_mock_settings("medium") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) prefixed_models = [ "openai/gpt-5", @@ -730,6 +734,10 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, """Prefixed _thinking models must have the suffix removed without double-prefixing.""" fake_settings = create_mock_settings("low") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) with patch( 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', @@ -753,6 +761,10 @@ async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypat """Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/).""" fake_settings = create_mock_settings("medium") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) with patch( 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', @@ -779,6 +791,10 @@ async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, moc """Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix.""" fake_settings = create_mock_settings("medium") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) # Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode cases = [