Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,12 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
{"type": "image_url", "image_url": {"url": img_path}}]

thinking_kwargs_gpt5 = None
if model.startswith('gpt-5'):
# Detect GPT-5 family regardless of provider prefix users may put in config
# (e.g. "openai/gpt-5.1-codex-max"). Without this, prefixed model names skipped
# the reasoning_effort path and litellm rejected the request with
# UnsupportedParamsError for temperature=0.2.
model_base = model.removeprefix('openai/').removeprefix('azure/')
if model_base.startswith('gpt-5'):
# Use configured reasoning_effort or default to MEDIUM

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Azure+prefix skips gpt-5 🐞 Bug ≡ Correctness

In Azure mode, chat_completion() prepends azure/ before computing model_base, and the new
removeprefix() chain only removes a single leading provider prefix. If the configured model is
already provider-qualified (e.g. openai/gpt-5* or azure/gpt-5*), model_base can still start
with openai/ or azure/, so the GPT-5 branch is skipped and temperature is sent (triggering the
same LiteLLM UnsupportedParamsError this PR aims to fix).
Agent Prompt
### Issue description
In `LiteLLMAIHandler.chat_completion()`, GPT-5 detection relies on `model_base` computed via `removeprefix('openai/').removeprefix('azure/')`. When `self.azure` is enabled, the function prepends `azure/` to the model first; if the incoming model already has a provider prefix, the resulting string can contain multiple provider segments (e.g. `azure/openai/gpt-5...` or `azure/azure/gpt-5...`). The current normalization removes at most one leading prefix, so `model_base` may still begin with a provider segment and fail `startswith('gpt-5')`, causing `temperature` to be passed to GPT-5.

### Issue Context
Models come directly from user configuration/fallback lists and can be provider-qualified strings.

### Fix Focus Areas
- pr_agent/algo/ai_handlers/litellm_ai_handler.py[409-413]
- pr_agent/algo/ai_handlers/litellm_ai_handler.py[433-461]
- pr_agent/algo/pr_processing.py[341-354]

### Suggested fix
Refactor provider handling so you do not build composite provider prefixes:
1) Determine/normalize the provider prefix separately from the model name (don’t mutate `model` with `'azure/' + model` before parsing).
2) Strip provider prefixes in a loop (or parse once) until the remaining model name is the real base (`gpt-5...`).
3) Then rebuild the final routed model string once (provider + base model), ensuring exactly one provider prefix.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

config_effort = get_settings().config.reasoning_effort
try:
Expand All @@ -450,7 +455,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
"allowed_openai_params": ["reasoning_effort"],
}
get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
model = 'openai/'+model.replace('_thinking', '') # remove _thinking suffix
# Preserve azure/ routing if it was applied above; otherwise route via openai/
provider_prefix = 'azure/' if self.azure else 'openai/'
model = provider_prefix + model_base.replace('_thinking', '') # remove _thinking suffix
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
Outdated


# Currently, some models do not support a separate system and user prompts
Expand Down
59 changes: 59 additions & 0 deletions tests/unittest/test_litellm_reasoning_effort.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,62 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):
# Should have reasoning_effort
call_kwargs = mock_completion.call_args[1]
assert call_kwargs["reasoning_effort"] == "medium"

# ========== Group 8: Provider Prefix Handling ==========

@pytest.mark.asyncio
async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger):
"""Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path.

Before the fix, startswith('gpt-5') was False for prefixed names, so the handler
sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError
for gpt-5 codex models.
"""
fake_settings = create_mock_settings("medium")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

prefixed_models = [
"openai/gpt-5",
"openai/gpt-5.1-codex",
"openai/gpt-5.1-codex-max",
"openai/gpt-5.4-mini",
]

for model in prefixed_models:
with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
Outdated
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
await handler.chat_completion(
model=model,
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
# GPT-5 path must trigger and drop temperature in favor of reasoning_effort
assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}"
assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}"
assert "temperature" not in call_kwargs, f"temperature leaked for {model}"
# Model name passed to litellm must keep the openai/ prefix exactly once
assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}"

@pytest.mark.asyncio
async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger):
"""Prefixed _thinking models must have the suffix removed without double-prefixing."""
fake_settings = create_mock_settings("low")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
await handler.chat_completion(
model="openai/gpt-5_thinking",
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
assert call_kwargs["model"] == "openai/gpt-5"
assert call_kwargs["reasoning_effort"] == "low"
Loading