diff --git a/relay/channel/claude/message_delta_usage_patch_test.go b/relay/channel/claude/message_delta_usage_patch_test.go index 43312587..42c2c70c 100644 --- a/relay/channel/claude/message_delta_usage_patch_test.go +++ b/relay/channel/claude/message_delta_usage_patch_test.go @@ -85,7 +85,7 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) { require.EqualValues(t, 50, usage.CacheCreationInputTokens) require.EqualValues(t, 53, usage.OutputTokens) require.NotNil(t, usage.CacheCreation) - require.EqualValues(t, 10, usage.CacheCreation.Ephemeral5mInputTokens) + require.EqualValues(t, 30, usage.CacheCreation.Ephemeral5mInputTokens) require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens) }) @@ -108,4 +108,22 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) { require.EqualValues(t, 7, usage.CacheReadInputTokens) require.EqualValues(t, 6, usage.CacheCreationInputTokens) }) + + t.Run("default aggregate cache creation to 5m when split missing", func(t *testing.T) { + claudeResponse := &dto.ClaudeResponse{Usage: &dto.ClaudeUsage{ + OutputTokens: 53, + CacheCreationInputTokens: 50, + }} + claudeInfo := &ClaudeResponseInfo{Usage: &dto.Usage{ + PromptTokensDetails: dto.InputTokenDetails{ + CachedCreationTokens: 50, + }, + }} + + usage := buildMessageDeltaPatchUsage(claudeResponse, claudeInfo) + require.NotNil(t, usage) + require.NotNil(t, usage.CacheCreation) + require.EqualValues(t, 50, usage.CacheCreation.Ephemeral5mInputTokens) + require.EqualValues(t, 0, usage.CacheCreation.Ephemeral1hInputTokens) + }) } diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index 645d5c0f..ba97bc90 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -648,6 +648,11 @@ func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage { return dto.Usage{} } clone := *usage + clone.ClaudeCacheCreation5mTokens, clone.ClaudeCacheCreation1hTokens = service.NormalizeCacheCreationSplit( + usage.PromptTokensDetails.CachedCreationTokens, + usage.ClaudeCacheCreation5mTokens, + usage.ClaudeCacheCreation1hTokens, + ) cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage) totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens clone.PromptTokens = totalInputTokens @@ -677,11 +682,26 @@ func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 { usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens } - if usage.CacheCreation == nil && (claudeInfo.Usage.ClaudeCacheCreation5mTokens > 0 || claudeInfo.Usage.ClaudeCacheCreation1hTokens > 0) { - usage.CacheCreation = &dto.ClaudeCacheCreationUsage{ - Ephemeral5mInputTokens: claudeInfo.Usage.ClaudeCacheCreation5mTokens, - Ephemeral1hInputTokens: claudeInfo.Usage.ClaudeCacheCreation1hTokens, - } + cacheCreation5m := 0 + cacheCreation1h := 0 + if usage.CacheCreation != nil { + cacheCreation5m = usage.CacheCreation.Ephemeral5mInputTokens + cacheCreation1h = usage.CacheCreation.Ephemeral1hInputTokens + } else { + cacheCreation5m = claudeInfo.Usage.ClaudeCacheCreation5mTokens + cacheCreation1h = claudeInfo.Usage.ClaudeCacheCreation1hTokens + } + cacheCreation5m, cacheCreation1h = service.NormalizeCacheCreationSplit( + usage.CacheCreationInputTokens, + cacheCreation5m, + cacheCreation1h, + ) + if usage.CacheCreation == nil && (cacheCreation5m > 0 || cacheCreation1h > 0) { + usage.CacheCreation = &dto.ClaudeCacheCreationUsage{} + } + if usage.CacheCreation != nil { + usage.CacheCreation.Ephemeral5mInputTokens = cacheCreation5m + usage.CacheCreation.Ephemeral1hInputTokens = cacheCreation1h } return usage } diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index 6e59da01..fdc7b38e 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -258,6 +258,23 @@ func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t * } } +func TestBuildOpenAIStyleUsageFromClaudeUsageDefaultsAggregateCacheCreationTo5m(t *testing.T) { + usage := &dto.Usage{ + PromptTokens: 100, + CompletionTokens: 20, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 30, + CachedCreationTokens: 50, + }, + UsageSemantic: "anthropic", + } + + openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage) + + require.Equal(t, 50, openAIUsage.ClaudeCacheCreation5mTokens) + require.Equal(t, 0, openAIUsage.ClaudeCacheCreation1hTokens) +} + func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) { request := dto.GeneralOpenAIRequest{ Model: "claude-3-5-sonnet", diff --git a/service/convert.go b/service/convert.go index 59d4f8fe..95acf835 100644 --- a/service/convert.go +++ b/service/convert.go @@ -227,21 +227,31 @@ func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage { if oaiUsage == nil { return nil } + cacheCreation5m, cacheCreation1h := NormalizeCacheCreationSplit( + oaiUsage.PromptTokensDetails.CachedCreationTokens, + oaiUsage.ClaudeCacheCreation5mTokens, + oaiUsage.ClaudeCacheCreation1hTokens, + ) usage := &dto.ClaudeUsage{ InputTokens: oaiUsage.PromptTokens, OutputTokens: oaiUsage.CompletionTokens, CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, } - if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 { + if cacheCreation5m > 0 || cacheCreation1h > 0 { usage.CacheCreation = &dto.ClaudeCacheCreationUsage{ - Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens, - Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens, + Ephemeral5mInputTokens: cacheCreation5m, + Ephemeral1hInputTokens: cacheCreation1h, } } return usage } +func NormalizeCacheCreationSplit(totalTokens int, tokens5m int, tokens1h int) (int, int) { + remainder := lo.Max([]int{totalTokens - tokens5m - tokens1h, 0}) + return tokens5m + remainder, tokens1h +} + func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse { if info.ClaudeConvertInfo.Done { return nil @@ -426,23 +436,28 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon } if len(openAIResponse.Choices) == 0 { - // no choices - // 可能为非标准的 OpenAI 响应,判断是否已经完成 - if info.ClaudeConvertInfo.Done { + // Some OpenAI-compatible upstreams end with a usage-only SSE chunk. + oaiUsage := openAIResponse.Usage + if oaiUsage == nil { + oaiUsage = info.ClaudeConvertInfo.Usage + } + if oaiUsage != nil { stopOpenBlocks() - oaiUsage := info.ClaudeConvertInfo.Usage - if oaiUsage != nil { - claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ - Type: "message_delta", - Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), - Delta: &dto.ClaudeMediaMessage{ - StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), - }, - }) + stopReason := stopReasonOpenAI2Claude(info.FinishReason) + if stopReason == "" { + stopReason = "end_turn" } + claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ + Type: "message_delta", + Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), + Delta: &dto.ClaudeMediaMessage{ + StopReason: common.GetPointer[string](stopReason), + }, + }) claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ Type: "message_stop", }) + info.ClaudeConvertInfo.Done = true } return claudeResponses } else { @@ -450,6 +465,13 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon doneChunk := chosenChoice.FinishReason != nil && *chosenChoice.FinishReason != "" if doneChunk { info.FinishReason = *chosenChoice.FinishReason + oaiUsage := openAIResponse.Usage + if oaiUsage == nil { + oaiUsage = info.ClaudeConvertInfo.Usage + // Some upstreams emit finish_reason first, then send a final usage-only chunk. + // Defer closing until usage is available so the final message_delta carries it. + return claudeResponses + } } var claudeResponse dto.ClaudeResponse