From b7fe4eba55903934a62bb079c13f31b148eee77b Mon Sep 17 00:00:00 2001
From: Sergey Petrov <sergey.petrov@travelata.ru>
Date: Mon, 18 May 2026 13:25:22 +0300
Subject: [PATCH 1/4] fix(litellm): detect GPT-5 family when provider prefix is
 in model name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPT-5 codex models only accept `temperature=1`. The handler converted
`temperature` into `reasoning_effort` only when the model string started
with `gpt-5`, but users frequently set the full provider-qualified name
in their config (e.g. `openai/gpt-5.1-codex-max`). That prefix made the
`startswith('gpt-5')` check fail, so `temperature=0.2` was sent and
litellm rejected the request with `UnsupportedParamsError`. The primary
model then always fell back to the secondary one — silently downgrading
quality and doubling per-call latency/cost.

Normalize the model name by stripping the `openai/` / `azure/` prefix
before the GPT-5 check, and rebuild the routed name with the original
provider so Azure routing is preserved.

Adds regression tests for `openai/gpt-5*` (including `codex-max`) and
for the `_thinking` suffix together with a prefix.
---
 .../algo/ai_handlers/litellm_ai_handler.py    | 11 +++-
 .../unittest/test_litellm_reasoning_effort.py | 59 +++++++++++++++++++
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index a6e79d7a07..7da1d2c996 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -431,7 +431,12 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                                               {"type": "image_url", "image_url": {"url": img_path}}]
 
                 thinking_kwargs_gpt5 = None
-                if model.startswith('gpt-5'):
+                # Detect GPT-5 family regardless of provider prefix users may put in config
+                # (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped
+                # the reasoning_effort path and litellm rejected the request with
+                # UnsupportedParamsError for temperature=0.2.
+                model_base = model.removeprefix('openai/').removeprefix('azure/')
+                if model_base.startswith('gpt-5'):
                     # Use configured reasoning_effort or default to MEDIUM
                     config_effort = get_settings().config.reasoning_effort
                     try:
@@ -450,7 +455,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                         "allowed_openai_params": ["reasoning_effort"],
                     }
                     get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
-                    model = 'openai/'+model.replace('_thinking', '')  # remove _thinking suffix
+                    # Preserve azure/ routing if it was applied above; otherwise route via openai/
+                    provider_prefix = 'azure/' if self.azure else 'openai/'
+                    model = provider_prefix + model_base.replace('_thinking', '')  # remove _thinking suffix
 
 
                 # Currently, some models do not support a separate system and user prompts
diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py
index 298fce640c..9ca7c0b90f 100644
--- a/tests/unittest/test_litellm_reasoning_effort.py
+++ b/tests/unittest/test_litellm_reasoning_effort.py
@@ -682,3 +682,62 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):
             # Should have reasoning_effort
             call_kwargs = mock_completion.call_args[1]
             assert call_kwargs["reasoning_effort"] == "medium"
+
+    # ========== Group 8: Provider Prefix Handling ==========
+
+    @pytest.mark.asyncio
+    async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger):
+        """Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path.
+
+        Before the fix, startswith('gpt-5') was False for prefixed names, so the handler
+        sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError
+        for gpt-5 codex models.
+        """
+        fake_settings = create_mock_settings("medium")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        prefixed_models = [
+            "openai/gpt-5",
+            "openai/gpt-5.1-codex",
+            "openai/gpt-5.1-codex-max",
+            "openai/gpt-5.4-mini",
+        ]
+
+        for model in prefixed_models:
+            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+                mock_completion.return_value = create_mock_acompletion_response()
+
+                handler = LiteLLMAIHandler()
+                await handler.chat_completion(
+                    model=model,
+                    system="test system",
+                    user="test user"
+                )
+
+                call_kwargs = mock_completion.call_args[1]
+                # GPT-5 path must trigger and drop temperature in favor of reasoning_effort
+                assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}"
+                assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}"
+                assert "temperature" not in call_kwargs, f"temperature leaked for {model}"
+                # Model name passed to litellm must keep the openai/ prefix exactly once
+                assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}"
+
+    @pytest.mark.asyncio
+    async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger):
+        """Prefixed _thinking models must have the suffix removed without double-prefixing."""
+        fake_settings = create_mock_settings("low")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+            mock_completion.return_value = create_mock_acompletion_response()
+
+            handler = LiteLLMAIHandler()
+            await handler.chat_completion(
+                model="openai/gpt-5_thinking",
+                system="test system",
+                user="test user"
+            )
+
+            call_kwargs = mock_completion.call_args[1]
+            assert call_kwargs["model"] == "openai/gpt-5"
+            assert call_kwargs["reasoning_effort"] == "low"

From ba459fb3dc9ad643ef1c274375a4a87fccb0b587 Mon Sep 17 00:00:00 2001
From: Sergey Petrov <sergey.petrov@travelata.ru>
Date: Mon, 18 May 2026 14:44:07 +0300
Subject: [PATCH 2/4] fix(litellm): preserve explicit provider prefix and
 dedupe stacked prefixes

Address two routing issues raised by the Qodo review on the initial fix:

1. Stacked prefixes (Azure mode + provider-qualified config). With
   self.azure prepending "azure/" to a model that already had an
   "openai/" or "azure/" prefix in config, the single removeprefix()
   call left a residual provider segment, GPT-5 detection failed, and
   temperature was still sent (the original bug this PR fixes).
2. Explicit "azure/" rewritten to "openai/" when self.azure was false.
   The previous rebuild used "azure/ if self.azure else openai/",
   which ignored an explicit "azure/" the user had written in config
   and silently rerouted the request to OpenAI.

Strip provider prefixes in a loop so any number of stacked segments is
removed, and choose the rebuilt prefix by priority: Azure mode >
explicit prefix in user config > "openai/" default. Capture the user's
original model string before the azure auto-prepend so the explicit
prefix is still visible at rebuild time.

Tests added for:
- explicit "azure/" preserved when self.azure is false
- Azure mode with bare / openai/- / azure/- prefixed configs all
  producing exactly one "azure/" prefix
---
 .../algo/ai_handlers/litellm_ai_handler.py    | 30 +++++++---
 .../unittest/test_litellm_reasoning_effort.py | 57 +++++++++++++++++++
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 7da1d2c996..386742f6cf 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -408,6 +408,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
             try:
                 resp, finish_reason = None, None
                 deployment_id = self.deployment_id
+                # Capture the original model string so an explicit provider prefix in the
+                # user config (e.g. "azure/gpt-5...") can be preserved when the GPT-5 branch
+                # rebuilds the routed model name below.
+                user_model = model
                 if self.azure:
                     model = 'azure/' + model
                 if 'claude' in model and not system:
@@ -431,11 +435,14 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                                               {"type": "image_url", "image_url": {"url": img_path}}]
 
                 thinking_kwargs_gpt5 = None
-                # Detect GPT-5 family regardless of provider prefix users may put in config
-                # (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped
-                # the reasoning_effort path and litellm rejected the request with
-                # UnsupportedParamsError for temperature=0.2.
-                model_base = model.removeprefix('openai/').removeprefix('azure/')
+                # Detect GPT-5 family regardless of provider prefix(es) on the model name.
+                # Users sometimes put a provider prefix in config (e.g. "openai/gpt-5.1-codex-max"),
+                # and Azure mode auto-prepends "azure/", which together can produce stacked prefixes
+                # like "azure/openai/gpt-5...". Without normalization the GPT-5 path is skipped and
+                # litellm rejects the request with UnsupportedParamsError for temperature=0.2.
+                model_base = model
+                while model_base.startswith(('openai/', 'azure/')):
+                    model_base = model_base.removeprefix('openai/').removeprefix('azure/')
                 if model_base.startswith('gpt-5'):
                     # Use configured reasoning_effort or default to MEDIUM
                     config_effort = get_settings().config.reasoning_effort
@@ -455,8 +462,17 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                         "allowed_openai_params": ["reasoning_effort"],
                     }
                     get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
-                    # Preserve azure/ routing if it was applied above; otherwise route via openai/
-                    provider_prefix = 'azure/' if self.azure else 'openai/'
+                    # Routing priority: Azure mode > explicit provider prefix in user config > openai/
+                    # default. This preserves an explicit "azure/" the user wrote in config even when
+                    # self.azure is false, and avoids stacking when self.azure already added "azure/".
+                    if self.azure:
+                        provider_prefix = 'azure/'
+                    elif user_model.startswith('azure/'):
+                        provider_prefix = 'azure/'
+                    elif user_model.startswith('openai/'):
+                        provider_prefix = 'openai/'
+                    else:
+                        provider_prefix = 'openai/'
                     model = provider_prefix + model_base.replace('_thinking', '')  # remove _thinking suffix
 
 
diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py
index 9ca7c0b90f..acf4e3128d 100644
--- a/tests/unittest/test_litellm_reasoning_effort.py
+++ b/tests/unittest/test_litellm_reasoning_effort.py
@@ -741,3 +741,60 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch,
             call_kwargs = mock_completion.call_args[1]
             assert call_kwargs["model"] == "openai/gpt-5"
             assert call_kwargs["reasoning_effort"] == "low"
+
+    @pytest.mark.asyncio
+    async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypatch, mock_logger):
+        """Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/)."""
+        fake_settings = create_mock_settings("medium")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+            mock_completion.return_value = create_mock_acompletion_response()
+
+            handler = LiteLLMAIHandler()
+            # self.azure is False by default (no Azure AD creds in test env)
+            await handler.chat_completion(
+                model="azure/gpt-5.1-codex-max",
+                system="test system",
+                user="test user"
+            )
+
+            call_kwargs = mock_completion.call_args[1]
+            assert call_kwargs["reasoning_effort"] == "medium"
+            assert "temperature" not in call_kwargs
+            # Provider prefix from user config must be preserved verbatim
+            assert call_kwargs["model"] == "azure/gpt-5.1-codex-max"
+
+    @pytest.mark.asyncio
+    async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, mock_logger):
+        """Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix."""
+        fake_settings = create_mock_settings("medium")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        # Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode
+        cases = [
+            ("gpt-5.1-codex", "azure/gpt-5.1-codex"),
+            ("openai/gpt-5.1-codex", "azure/gpt-5.1-codex"),
+            ("azure/gpt-5.1-codex", "azure/gpt-5.1-codex"),
+        ]
+
+        for input_model, expected in cases:
+            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+                mock_completion.return_value = create_mock_acompletion_response()
+
+                handler = LiteLLMAIHandler()
+                handler.azure = True  # simulate Azure-mode handler
+                await handler.chat_completion(
+                    model=input_model,
+                    system="test system",
+                    user="test user"
+                )
+
+                call_kwargs = mock_completion.call_args[1]
+                # GPT-5 path must trigger
+                assert call_kwargs["reasoning_effort"] == "medium", f"failed for {input_model}"
+                assert "temperature" not in call_kwargs, f"temperature leaked for {input_model}"
+                # Exactly one azure/ prefix, no stacked/duplicated provider segments
+                assert call_kwargs["model"] == expected, (
+                    f"wrong routing for {input_model}: got {call_kwargs['model']}, expected {expected}"
+                )

From 66026ffc372b26e0ff853a991a32154b8ffdab7f Mon Sep 17 00:00:00 2001
From: Sergey Petrov <sergey.petrov@travelata.ru>
Date: Mon, 18 May 2026 14:44:14 +0300
Subject: [PATCH 3/4] chore(tests): wrap long with-patch lines in GPT-5 prefix
 tests

Four `with patch(...)` statements in the GPT-5 provider-prefix tests
exceeded the repo's 120-character line limit (ruff line-length=120 in
pyproject.toml). Wrap them onto multiple lines so the test file does
not regress lint compliance for the newly added cases.
---
 .../unittest/test_litellm_reasoning_effort.py | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py
index acf4e3128d..2175086dae 100644
--- a/tests/unittest/test_litellm_reasoning_effort.py
+++ b/tests/unittest/test_litellm_reasoning_effort.py
@@ -704,7 +704,10 @@ async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypat
         ]
 
         for model in prefixed_models:
-            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+            with patch(
+                'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
+                new_callable=AsyncMock,
+            ) as mock_completion:
                 mock_completion.return_value = create_mock_acompletion_response()
 
                 handler = LiteLLMAIHandler()
@@ -728,7 +731,10 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch,
         fake_settings = create_mock_settings("low")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
 
-        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+        with patch(
+            'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
+            new_callable=AsyncMock,
+        ) as mock_completion:
             mock_completion.return_value = create_mock_acompletion_response()
 
             handler = LiteLLMAIHandler()
@@ -748,7 +754,10 @@ async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypat
         fake_settings = create_mock_settings("medium")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
 
-        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+        with patch(
+            'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
+            new_callable=AsyncMock,
+        ) as mock_completion:
             mock_completion.return_value = create_mock_acompletion_response()
 
             handler = LiteLLMAIHandler()
@@ -779,7 +788,10 @@ async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, moc
         ]
 
         for input_model, expected in cases:
-            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+            with patch(
+                'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
+                new_callable=AsyncMock,
+            ) as mock_completion:
                 mock_completion.return_value = create_mock_acompletion_response()
 
                 handler = LiteLLMAIHandler()

From 79513bafbacd08e58fa685e09861fb0f0c795562 Mon Sep 17 00:00:00 2001
From: Sergey Petrov <sergey.petrov@travelata.ru>
Date: Mon, 18 May 2026 15:00:37 +0300
Subject: [PATCH 4/4] test(litellm): explicitly isolate GPT-5 prefix tests from
 runner env

Address Qodo review feedback: the added Group 8 tests instantiate
LiteLLMAIHandler() directly, and its __init__ branches on environment
variables such as AWS_USE_IMDS, OPENAI_API_KEY, and the AWS_* set. With
those vars potentially leaking from the runner environment, the tests
could behave differently across machines or CI runners (e.g. attempt
boto3 import when AWS_USE_IMDS is set).

Explicitly delete the relevant env vars via monkeypatch.delenv() at the
start of each new test, so the constructor takes a deterministic path
regardless of where the suite runs.
---
 tests/unittest/test_litellm_reasoning_effort.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py
index 2175086dae..3c4905ef01 100644
--- a/tests/unittest/test_litellm_reasoning_effort.py
+++ b/tests/unittest/test_litellm_reasoning_effort.py
@@ -695,6 +695,10 @@ async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypat
         """
         fake_settings = create_mock_settings("medium")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+        # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
+        for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
+                     "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
+            monkeypatch.delenv(_var, raising=False)
 
         prefixed_models = [
             "openai/gpt-5",
@@ -730,6 +734,10 @@ async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch,
         """Prefixed _thinking models must have the suffix removed without double-prefixing."""
         fake_settings = create_mock_settings("low")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+        # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
+        for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
+                     "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
+            monkeypatch.delenv(_var, raising=False)
 
         with patch(
             'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
@@ -753,6 +761,10 @@ async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypat
         """Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/)."""
         fake_settings = create_mock_settings("medium")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+        # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
+        for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
+                     "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
+            monkeypatch.delenv(_var, raising=False)
 
         with patch(
             'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
@@ -779,6 +791,10 @@ async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, moc
         """Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix."""
         fake_settings = create_mock_settings("medium")
         monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+        # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
+        for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
+                     "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
+            monkeypatch.delenv(_var, raising=False)
 
         # Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode
         cases = [