diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index a6e79d7a07..386742f6cf 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -408,6 +408,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: try: resp, finish_reason = None, None deployment_id = self.deployment_id + # Capture the original model string so an explicit provider prefix in the + # user config (e.g. "azure/gpt-5...") can be preserved when the GPT-5 branch + # rebuilds the routed model name below. + user_model = model if self.azure: model = 'azure/' + model if 'claude' in model and not system: @@ -431,7 +435,15 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: {"type": "image_url", "image_url": {"url": img_path}}] thinking_kwargs_gpt5 = None - if model.startswith('gpt-5'): + # Detect GPT-5 family regardless of provider prefix(es) on the model name. + # Users sometimes put a provider prefix in config (e.g. "openai/gpt-5.1-codex-max"), + # and Azure mode auto-prepends "azure/", which together can produce stacked prefixes + # like "azure/openai/gpt-5...". Without normalization the GPT-5 path is skipped and + # litellm rejects the request with UnsupportedParamsError for temperature=0.2. + model_base = model + while model_base.startswith(('openai/', 'azure/')): + model_base = model_base.removeprefix('openai/').removeprefix('azure/') + if model_base.startswith('gpt-5'): # Use configured reasoning_effort or default to MEDIUM config_effort = get_settings().config.reasoning_effort try: @@ -450,7 +462,18 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: "allowed_openai_params": ["reasoning_effort"], } get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model") - model = 'openai/'+model.replace('_thinking', '') # remove _thinking suffix + # Routing priority: Azure mode > explicit provider prefix in user config > openai/ + # default. This preserves an explicit "azure/" the user wrote in config even when + # self.azure is false, and avoids stacking when self.azure already added "azure/". + if self.azure: + provider_prefix = 'azure/' + elif user_model.startswith('azure/'): + provider_prefix = 'azure/' + elif user_model.startswith('openai/'): + provider_prefix = 'openai/' + else: + provider_prefix = 'openai/' + model = provider_prefix + model_base.replace('_thinking', '') # remove _thinking suffix # Currently, some models do not support a separate system and user prompts diff --git a/tests/unittest/test_litellm_reasoning_effort.py b/tests/unittest/test_litellm_reasoning_effort.py index 298fce640c..3c4905ef01 100644 --- a/tests/unittest/test_litellm_reasoning_effort.py +++ b/tests/unittest/test_litellm_reasoning_effort.py @@ -682,3 +682,147 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger): # Should have reasoning_effort call_kwargs = mock_completion.call_args[1] assert call_kwargs["reasoning_effort"] == "medium" + + # ========== Group 8: Provider Prefix Handling ========== + + @pytest.mark.asyncio + async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger): + """Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path. + + Before the fix, startswith('gpt-5') was False for prefixed names, so the handler + sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError + for gpt-5 codex models. + """ + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) + + prefixed_models = [ + "openai/gpt-5", + "openai/gpt-5.1-codex", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.4-mini", + ] + + for model in prefixed_models: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model=model, + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + # GPT-5 path must trigger and drop temperature in favor of reasoning_effort + assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}" + assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}" + assert "temperature" not in call_kwargs, f"temperature leaked for {model}" + # Model name passed to litellm must keep the openai/ prefix exactly once + assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}" + + @pytest.mark.asyncio + async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger): + """Prefixed _thinking models must have the suffix removed without double-prefixing.""" + fake_settings = create_mock_settings("low") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) + + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model="openai/gpt-5_thinking", + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["model"] == "openai/gpt-5" + assert call_kwargs["reasoning_effort"] == "low" + + @pytest.mark.asyncio + async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypatch, mock_logger): + """Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/).""" + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) + + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + # self.azure is False by default (no Azure AD creds in test env) + await handler.chat_completion( + model="azure/gpt-5.1-codex-max", + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["reasoning_effort"] == "medium" + assert "temperature" not in call_kwargs + # Provider prefix from user config must be preserved verbatim + assert call_kwargs["model"] == "azure/gpt-5.1-codex-max" + + @pytest.mark.asyncio + async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, mock_logger): + """Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix.""" + fake_settings = create_mock_settings("medium") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + # Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars. + for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"): + monkeypatch.delenv(_var, raising=False) + + # Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode + cases = [ + ("gpt-5.1-codex", "azure/gpt-5.1-codex"), + ("openai/gpt-5.1-codex", "azure/gpt-5.1-codex"), + ("azure/gpt-5.1-codex", "azure/gpt-5.1-codex"), + ] + + for input_model, expected in cases: + with patch( + 'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + handler.azure = True # simulate Azure-mode handler + await handler.chat_completion( + model=input_model, + system="test system", + user="test user" + ) + + call_kwargs = mock_completion.call_args[1] + # GPT-5 path must trigger + assert call_kwargs["reasoning_effort"] == "medium", f"failed for {input_model}" + assert "temperature" not in call_kwargs, f"temperature leaked for {input_model}" + # Exactly one azure/ prefix, no stacked/duplicated provider segments + assert call_kwargs["model"] == expected, ( + f"wrong routing for {input_model}: got {call_kwargs['model']}, expected {expected}" + )