fix(providers): parse Ollama reasoning fields

2026-06-09 16:02:14 +08:00 · 2026-06-08 10:08:32 +09:00
parent 6001156a6c
commit 7503c1c031
2 changed files with 135 additions and 0 deletions
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -572,6 +572,7 @@ impl StreamState {
                .delta
                .reasoning_content
                .filter(|value| !value.is_empty())
                .or(choice.delta.reasoning.filter(|value| !value.is_empty()))
                .or(choice
                    .delta
                    .thinking
@@ -827,6 +828,8 @@ struct ChatMessage {
    #[serde(default)]
    reasoning_content: Option<String>,
    #[serde(default)]
    reasoning: Option<String>,
    #[serde(default)]
    tool_calls: Vec<ResponseToolCall>,
 }
@@ -901,6 +904,8 @@ struct ChunkDelta {
    #[serde(default)]
    reasoning_content: Option<String>,
    #[serde(default)]
    reasoning: Option<String>,
    #[serde(default)]
    thinking: Option<ThinkingDelta>,
    #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
    tool_calls: Vec<DeltaToolCall>,
@@ -1510,6 +1515,7 @@ fn normalize_response(
        .message
        .reasoning_content
        .filter(|value| !value.is_empty())
        .or(choice.message.reasoning.filter(|value| !value.is_empty()))
    {
        content.push(OutputContentBlock::Thinking {
            thinking,
@@ -1992,6 +1998,7 @@ mod tests {
                    role: "assistant".to_string(),
                    content: Some("final answer".to_string()),
                    reasoning_content: Some("hidden thought".to_string()),
                    reasoning: None,
                    tool_calls: Vec::new(),
                },
                finish_reason: Some("stop".to_string()),
@@ -2029,6 +2036,7 @@ mod tests {
                    delta: super::ChunkDelta {
                        content: None,
                        reasoning_content: Some("think".to_string()),
                        reasoning: None,
                        thinking: None,
                        tool_calls: Vec::new(),
                    },
@@ -2046,6 +2054,7 @@ mod tests {
                        delta: super::ChunkDelta {
                            content: Some(" answer".to_string()),
                            reasoning_content: None,
                            reasoning: None,
                            thinking: None,
                            tool_calls: Vec::new(),
                        },
--- a/rust/crates/api/tests/openai_compat_integration.rs
+++ b/rust/crates/api/tests/openai_compat_integration.rs
@@ -166,6 +166,55 @@ async fn send_message_preserves_deepseek_reasoning_content_before_text() {
    assert_eq!(body["thinking"], json!({"type": "enabled"}));
 }
 #[tokio::test]
 async fn send_message_preserves_ollama_reasoning_before_text() {
    let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
    let body = concat!(
        "{",
        "\"id\":\"chatcmpl_ollama_reasoning\",",
        "\"model\":\"qwen3:latest\",",
        "\"choices\":[{",
        "\"message\":{\"role\":\"assistant\",\"reasoning\":\"Think locally\",\"content\":\"Answer locally\",\"tool_calls\":[]},",
        "\"finish_reason\":\"stop\"",
        "}],",
        "\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}",
        "}"
    );
    let server = spawn_server(
        state.clone(),
        vec![http_response("200 OK", "application/json", body)],
    )
    .await;
    let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
        .with_base_url(server.base_url());
    let response = client
        .send_message(&MessageRequest {
            model: "openai/qwen3:latest".to_string(),
            ..sample_request(false)
        })
        .await
        .expect("request should succeed");
    assert_eq!(
        response.content,
        vec![
            OutputContentBlock::Thinking {
                thinking: "Think locally".to_string(),
                signature: None,
            },
            OutputContentBlock::Text {
                text: "Answer locally".to_string(),
            },
        ]
    );
    let captured = state.lock().await;
    let request = captured.first().expect("server should capture request");
    let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
    assert_eq!(body["model"], json!("qwen3:latest"));
 }
 #[tokio::test]
 async fn local_openai_gateway_strips_routing_prefix_and_preserves_extra_body_params() {
    let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
@@ -389,6 +438,83 @@ async fn stream_message_normalizes_text_and_multiple_tool_calls() {
    assert!(request.body.contains("\"stream\":true"));
 }
 #[tokio::test]
 async fn stream_message_preserves_ollama_reasoning_before_text() {
    let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
    let sse = concat!(
        "data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"model\":\"qwen3:latest\",\"choices\":[{\"delta\":{\"reasoning\":\"Think\"}}]}\n\n",
        "data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"choices\":[{\"delta\":{\"content\":\" answer\"},\"finish_reason\":\"stop\"}]}\n\n",
        "data: [DONE]\n\n"
    );
    let server = spawn_server(
        state.clone(),
        vec![http_response_with_headers(
            "200 OK",
            "text/event-stream",
            sse,
            &[("x-request-id", "req_ollama_reasoning_stream")],
        )],
    )
    .await;
    let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
        .with_base_url(server.base_url());
    let mut stream = client
        .stream_message(&MessageRequest {
            model: "openai/qwen3:latest".to_string(),
            ..sample_request(false)
        })
        .await
        .expect("stream should start");
    assert_eq!(stream.request_id(), Some("req_ollama_reasoning_stream"));
    let mut events = Vec::new();
    while let Some(event) = stream.next_event().await.expect("event should parse") {
        events.push(event);
    }
    assert!(matches!(events[0], StreamEvent::MessageStart(_)));
    assert!(matches!(
        events[1],
        StreamEvent::ContentBlockStart(ContentBlockStartEvent {
            index: 0,
            content_block: OutputContentBlock::Thinking { .. },
        })
    ));
    assert!(matches!(
        events[2],
        StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
            index: 0,
            delta: ContentBlockDelta::ThinkingDelta { .. },
        })
    ));
    assert!(matches!(
        events[3],
        StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
    ));
    assert!(matches!(
        events[4],
        StreamEvent::ContentBlockStart(ContentBlockStartEvent {
            index: 1,
            content_block: OutputContentBlock::Text { .. },
        })
    ));
    assert!(matches!(
        events[5],
        StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
            index: 1,
            delta: ContentBlockDelta::TextDelta { .. },
        })
    ));
    let captured = state.lock().await;
    let request = captured.first().expect("captured request");
    let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
    assert_eq!(body["model"], json!("qwen3:latest"));
    assert_eq!(body["stream"], json!(true));
 }
 #[allow(clippy::await_holding_lock)]
 #[tokio::test]
 async fn stream_message_retries_retryable_sse_handshake_failures() {