fix: emit claude message_delta for usage-only final stream chunk

This commit is contained in:
Seefs
2026-04-04 20:21:13 +08:00
parent 495e4f5e17
commit 82c2008d2c
4 changed files with 98 additions and 21 deletions
@@ -85,7 +85,7 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) {
require.EqualValues(t, 50, usage.CacheCreationInputTokens) require.EqualValues(t, 50, usage.CacheCreationInputTokens)
require.EqualValues(t, 53, usage.OutputTokens) require.EqualValues(t, 53, usage.OutputTokens)
require.NotNil(t, usage.CacheCreation) require.NotNil(t, usage.CacheCreation)
require.EqualValues(t, 10, usage.CacheCreation.Ephemeral5mInputTokens) require.EqualValues(t, 30, usage.CacheCreation.Ephemeral5mInputTokens)
require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens) require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens)
}) })
@@ -108,4 +108,22 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) {
require.EqualValues(t, 7, usage.CacheReadInputTokens) require.EqualValues(t, 7, usage.CacheReadInputTokens)
require.EqualValues(t, 6, usage.CacheCreationInputTokens) require.EqualValues(t, 6, usage.CacheCreationInputTokens)
}) })
t.Run("default aggregate cache creation to 5m when split missing", func(t *testing.T) {
claudeResponse := &dto.ClaudeResponse{Usage: &dto.ClaudeUsage{
OutputTokens: 53,
CacheCreationInputTokens: 50,
}}
claudeInfo := &ClaudeResponseInfo{Usage: &dto.Usage{
PromptTokensDetails: dto.InputTokenDetails{
CachedCreationTokens: 50,
},
}}
usage := buildMessageDeltaPatchUsage(claudeResponse, claudeInfo)
require.NotNil(t, usage)
require.NotNil(t, usage.CacheCreation)
require.EqualValues(t, 50, usage.CacheCreation.Ephemeral5mInputTokens)
require.EqualValues(t, 0, usage.CacheCreation.Ephemeral1hInputTokens)
})
} }
+24 -4
View File
@@ -648,6 +648,11 @@ func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage {
return dto.Usage{} return dto.Usage{}
} }
clone := *usage clone := *usage
clone.ClaudeCacheCreation5mTokens, clone.ClaudeCacheCreation1hTokens = service.NormalizeCacheCreationSplit(
usage.PromptTokensDetails.CachedCreationTokens,
usage.ClaudeCacheCreation5mTokens,
usage.ClaudeCacheCreation1hTokens,
)
cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage) cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
clone.PromptTokens = totalInputTokens clone.PromptTokens = totalInputTokens
@@ -677,11 +682,26 @@ func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo
if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 { if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 {
usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens
} }
if usage.CacheCreation == nil && (claudeInfo.Usage.ClaudeCacheCreation5mTokens > 0 || claudeInfo.Usage.ClaudeCacheCreation1hTokens > 0) { cacheCreation5m := 0
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{ cacheCreation1h := 0
Ephemeral5mInputTokens: claudeInfo.Usage.ClaudeCacheCreation5mTokens, if usage.CacheCreation != nil {
Ephemeral1hInputTokens: claudeInfo.Usage.ClaudeCacheCreation1hTokens, cacheCreation5m = usage.CacheCreation.Ephemeral5mInputTokens
cacheCreation1h = usage.CacheCreation.Ephemeral1hInputTokens
} else {
cacheCreation5m = claudeInfo.Usage.ClaudeCacheCreation5mTokens
cacheCreation1h = claudeInfo.Usage.ClaudeCacheCreation1hTokens
} }
cacheCreation5m, cacheCreation1h = service.NormalizeCacheCreationSplit(
usage.CacheCreationInputTokens,
cacheCreation5m,
cacheCreation1h,
)
if usage.CacheCreation == nil && (cacheCreation5m > 0 || cacheCreation1h > 0) {
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{}
}
if usage.CacheCreation != nil {
usage.CacheCreation.Ephemeral5mInputTokens = cacheCreation5m
usage.CacheCreation.Ephemeral1hInputTokens = cacheCreation1h
} }
return usage return usage
} }
+17
View File
@@ -258,6 +258,23 @@ func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *
} }
} }
func TestBuildOpenAIStyleUsageFromClaudeUsageDefaultsAggregateCacheCreationTo5m(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 20,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 30,
CachedCreationTokens: 50,
},
UsageSemantic: "anthropic",
}
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
require.Equal(t, 50, openAIUsage.ClaudeCacheCreation5mTokens)
require.Equal(t, 0, openAIUsage.ClaudeCacheCreation1hTokens)
}
func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) { func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) {
request := dto.GeneralOpenAIRequest{ request := dto.GeneralOpenAIRequest{
Model: "claude-3-5-sonnet", Model: "claude-3-5-sonnet",
+32 -10
View File
@@ -227,21 +227,31 @@ func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
if oaiUsage == nil { if oaiUsage == nil {
return nil return nil
} }
cacheCreation5m, cacheCreation1h := NormalizeCacheCreationSplit(
oaiUsage.PromptTokensDetails.CachedCreationTokens,
oaiUsage.ClaudeCacheCreation5mTokens,
oaiUsage.ClaudeCacheCreation1hTokens,
)
usage := &dto.ClaudeUsage{ usage := &dto.ClaudeUsage{
InputTokens: oaiUsage.PromptTokens, InputTokens: oaiUsage.PromptTokens,
OutputTokens: oaiUsage.CompletionTokens, OutputTokens: oaiUsage.CompletionTokens,
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
} }
if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 { if cacheCreation5m > 0 || cacheCreation1h > 0 {
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{ usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens, Ephemeral5mInputTokens: cacheCreation5m,
Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens, Ephemeral1hInputTokens: cacheCreation1h,
} }
} }
return usage return usage
} }
func NormalizeCacheCreationSplit(totalTokens int, tokens5m int, tokens1h int) (int, int) {
remainder := lo.Max([]int{totalTokens - tokens5m - tokens1h, 0})
return tokens5m + remainder, tokens1h
}
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse { func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
if info.ClaudeConvertInfo.Done { if info.ClaudeConvertInfo.Done {
return nil return nil
@@ -426,23 +436,28 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
} }
if len(openAIResponse.Choices) == 0 { if len(openAIResponse.Choices) == 0 {
// no choices // Some OpenAI-compatible upstreams end with a usage-only SSE chunk.
// 可能为非标准的 OpenAI 响应,判断是否已经完成 oaiUsage := openAIResponse.Usage
if info.ClaudeConvertInfo.Done { if oaiUsage == nil {
stopOpenBlocks() oaiUsage = info.ClaudeConvertInfo.Usage
oaiUsage := info.ClaudeConvertInfo.Usage }
if oaiUsage != nil { if oaiUsage != nil {
stopOpenBlocks()
stopReason := stopReasonOpenAI2Claude(info.FinishReason)
if stopReason == "" {
stopReason = "end_turn"
}
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Type: "message_delta", Type: "message_delta",
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
Delta: &dto.ClaudeMediaMessage{ Delta: &dto.ClaudeMediaMessage{
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), StopReason: common.GetPointer[string](stopReason),
}, },
}) })
}
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Type: "message_stop", Type: "message_stop",
}) })
info.ClaudeConvertInfo.Done = true
} }
return claudeResponses return claudeResponses
} else { } else {
@@ -450,6 +465,13 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
doneChunk := chosenChoice.FinishReason != nil && *chosenChoice.FinishReason != "" doneChunk := chosenChoice.FinishReason != nil && *chosenChoice.FinishReason != ""
if doneChunk { if doneChunk {
info.FinishReason = *chosenChoice.FinishReason info.FinishReason = *chosenChoice.FinishReason
oaiUsage := openAIResponse.Usage
if oaiUsage == nil {
oaiUsage = info.ClaudeConvertInfo.Usage
// Some upstreams emit finish_reason first, then send a final usage-only chunk.
// Defer closing until usage is available so the final message_delta carries it.
return claudeResponses
}
} }
var claudeResponse dto.ClaudeResponse var claudeResponse dto.ClaudeResponse