The-PR-Agent · sizickp · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -431,7 +431,12 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                                               {"type": "image_url", "image_url": {"url": img_path}}]
 
                 thinking_kwargs_gpt5 = None
-                if model.startswith('gpt-5'):
+                # Detect GPT-5 family regardless of provider prefix users may put in config
+                # (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped
+                # the reasoning_effort path and litellm rejected the request with
+                # UnsupportedParamsError for temperature=0.2.
+                model_base = model.removeprefix('openai/').removeprefix('azure/')
+                if model_base.startswith('gpt-5'):
                     # Use configured reasoning_effort or default to MEDIUM
                     config_effort = get_settings().config.reasoning_effort
                     try:
@@ -450,7 +455,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                         "allowed_openai_params": ["reasoning_effort"],
                     }
                     get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
-                    model = 'openai/'+model.replace('_thinking', '')  # remove _thinking suffix
+                    # Preserve azure/ routing if it was applied above; otherwise route via openai/
+                    provider_prefix = 'azure/' if self.azure else 'openai/'
+                    model = provider_prefix + model_base.replace('_thinking', '')  # remove _thinking suffix
 
 
                 # Currently, some models do not support a separate system and user prompts

diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py
@@ -682,3 +682,62 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):
             # Should have reasoning_effort
             call_kwargs = mock_completion.call_args[1]
             assert call_kwargs["reasoning_effort"] == "medium"
+
+    # ========== Group 8: Provider Prefix Handling ==========
+
+    @pytest.mark.asyncio
+    async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger):
+        """Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path.
+
+        Before the fix, startswith('gpt-5') was False for prefixed names, so the handler
+        sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError
+        for gpt-5 codex models.
+        """
+        fake_settings = create_mock_settings("medium")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        prefixed_models = [
+            "openai/gpt-5",
+            "openai/gpt-5.1-codex",
+            "openai/gpt-5.1-codex-max",
+            "openai/gpt-5.4-mini",
+        ]
+
+        for model in prefixed_models:
+            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+                mock_completion.return_value = create_mock_acompletion_response()
+
+                handler = LiteLLMAIHandler()
+                await handler.chat_completion(
+                    model=model,
+                    system="test system",
+                    user="test user"
+                )
+
+                call_kwargs = mock_completion.call_args[1]
+                # GPT-5 path must trigger and drop temperature in favor of reasoning_effort
+                assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}"
+                assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}"
+                assert "temperature" not in call_kwargs, f"temperature leaked for {model}"
+                # Model name passed to litellm must keep the openai/ prefix exactly once
+                assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}"
+
+    @pytest.mark.asyncio
+    async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger):
+        """Prefixed _thinking models must have the suffix removed without double-prefixing."""
+        fake_settings = create_mock_settings("low")
+        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
+            mock_completion.return_value = create_mock_acompletion_response()
+
+            handler = LiteLLMAIHandler()
+            await handler.chat_completion(
+                model="openai/gpt-5_thinking",
+                system="test system",
+                user="test user"
+            )
+
+            call_kwargs = mock_completion.call_args[1]
+            assert call_kwargs["model"] == "openai/gpt-5"
+            assert call_kwargs["reasoning_effort"] == "low"