fix: emit claude message_delta for usage-only final stream chunk
This commit is contained in:
@@ -85,7 +85,7 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) {
|
|||||||
require.EqualValues(t, 50, usage.CacheCreationInputTokens)
|
require.EqualValues(t, 50, usage.CacheCreationInputTokens)
|
||||||
require.EqualValues(t, 53, usage.OutputTokens)
|
require.EqualValues(t, 53, usage.OutputTokens)
|
||||||
require.NotNil(t, usage.CacheCreation)
|
require.NotNil(t, usage.CacheCreation)
|
||||||
require.EqualValues(t, 10, usage.CacheCreation.Ephemeral5mInputTokens)
|
require.EqualValues(t, 30, usage.CacheCreation.Ephemeral5mInputTokens)
|
||||||
require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens)
|
require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -108,4 +108,22 @@ func TestBuildMessageDeltaPatchUsage(t *testing.T) {
|
|||||||
require.EqualValues(t, 7, usage.CacheReadInputTokens)
|
require.EqualValues(t, 7, usage.CacheReadInputTokens)
|
||||||
require.EqualValues(t, 6, usage.CacheCreationInputTokens)
|
require.EqualValues(t, 6, usage.CacheCreationInputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("default aggregate cache creation to 5m when split missing", func(t *testing.T) {
|
||||||
|
claudeResponse := &dto.ClaudeResponse{Usage: &dto.ClaudeUsage{
|
||||||
|
OutputTokens: 53,
|
||||||
|
CacheCreationInputTokens: 50,
|
||||||
|
}}
|
||||||
|
claudeInfo := &ClaudeResponseInfo{Usage: &dto.Usage{
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedCreationTokens: 50,
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
|
||||||
|
usage := buildMessageDeltaPatchUsage(claudeResponse, claudeInfo)
|
||||||
|
require.NotNil(t, usage)
|
||||||
|
require.NotNil(t, usage.CacheCreation)
|
||||||
|
require.EqualValues(t, 50, usage.CacheCreation.Ephemeral5mInputTokens)
|
||||||
|
require.EqualValues(t, 0, usage.CacheCreation.Ephemeral1hInputTokens)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -648,6 +648,11 @@ func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage {
|
|||||||
return dto.Usage{}
|
return dto.Usage{}
|
||||||
}
|
}
|
||||||
clone := *usage
|
clone := *usage
|
||||||
|
clone.ClaudeCacheCreation5mTokens, clone.ClaudeCacheCreation1hTokens = service.NormalizeCacheCreationSplit(
|
||||||
|
usage.PromptTokensDetails.CachedCreationTokens,
|
||||||
|
usage.ClaudeCacheCreation5mTokens,
|
||||||
|
usage.ClaudeCacheCreation1hTokens,
|
||||||
|
)
|
||||||
cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
|
cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
|
||||||
totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
|
totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
|
||||||
clone.PromptTokens = totalInputTokens
|
clone.PromptTokens = totalInputTokens
|
||||||
@@ -677,11 +682,26 @@ func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo
|
|||||||
if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 {
|
if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 {
|
||||||
usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens
|
usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens
|
||||||
}
|
}
|
||||||
if usage.CacheCreation == nil && (claudeInfo.Usage.ClaudeCacheCreation5mTokens > 0 || claudeInfo.Usage.ClaudeCacheCreation1hTokens > 0) {
|
cacheCreation5m := 0
|
||||||
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
cacheCreation1h := 0
|
||||||
Ephemeral5mInputTokens: claudeInfo.Usage.ClaudeCacheCreation5mTokens,
|
if usage.CacheCreation != nil {
|
||||||
Ephemeral1hInputTokens: claudeInfo.Usage.ClaudeCacheCreation1hTokens,
|
cacheCreation5m = usage.CacheCreation.Ephemeral5mInputTokens
|
||||||
}
|
cacheCreation1h = usage.CacheCreation.Ephemeral1hInputTokens
|
||||||
|
} else {
|
||||||
|
cacheCreation5m = claudeInfo.Usage.ClaudeCacheCreation5mTokens
|
||||||
|
cacheCreation1h = claudeInfo.Usage.ClaudeCacheCreation1hTokens
|
||||||
|
}
|
||||||
|
cacheCreation5m, cacheCreation1h = service.NormalizeCacheCreationSplit(
|
||||||
|
usage.CacheCreationInputTokens,
|
||||||
|
cacheCreation5m,
|
||||||
|
cacheCreation1h,
|
||||||
|
)
|
||||||
|
if usage.CacheCreation == nil && (cacheCreation5m > 0 || cacheCreation1h > 0) {
|
||||||
|
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{}
|
||||||
|
}
|
||||||
|
if usage.CacheCreation != nil {
|
||||||
|
usage.CacheCreation.Ephemeral5mInputTokens = cacheCreation5m
|
||||||
|
usage.CacheCreation.Ephemeral1hInputTokens = cacheCreation1h
|
||||||
}
|
}
|
||||||
return usage
|
return usage
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -258,6 +258,23 @@ func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildOpenAIStyleUsageFromClaudeUsageDefaultsAggregateCacheCreationTo5m(t *testing.T) {
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
CompletionTokens: 20,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 30,
|
||||||
|
CachedCreationTokens: 50,
|
||||||
|
},
|
||||||
|
UsageSemantic: "anthropic",
|
||||||
|
}
|
||||||
|
|
||||||
|
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
|
||||||
|
|
||||||
|
require.Equal(t, 50, openAIUsage.ClaudeCacheCreation5mTokens)
|
||||||
|
require.Equal(t, 0, openAIUsage.ClaudeCacheCreation1hTokens)
|
||||||
|
}
|
||||||
|
|
||||||
func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) {
|
func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) {
|
||||||
request := dto.GeneralOpenAIRequest{
|
request := dto.GeneralOpenAIRequest{
|
||||||
Model: "claude-3-5-sonnet",
|
Model: "claude-3-5-sonnet",
|
||||||
|
|||||||
+37
-15
@@ -227,21 +227,31 @@ func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
|
|||||||
if oaiUsage == nil {
|
if oaiUsage == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
cacheCreation5m, cacheCreation1h := NormalizeCacheCreationSplit(
|
||||||
|
oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||||
|
oaiUsage.ClaudeCacheCreation5mTokens,
|
||||||
|
oaiUsage.ClaudeCacheCreation1hTokens,
|
||||||
|
)
|
||||||
usage := &dto.ClaudeUsage{
|
usage := &dto.ClaudeUsage{
|
||||||
InputTokens: oaiUsage.PromptTokens,
|
InputTokens: oaiUsage.PromptTokens,
|
||||||
OutputTokens: oaiUsage.CompletionTokens,
|
OutputTokens: oaiUsage.CompletionTokens,
|
||||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||||
}
|
}
|
||||||
if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 {
|
if cacheCreation5m > 0 || cacheCreation1h > 0 {
|
||||||
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
||||||
Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens,
|
Ephemeral5mInputTokens: cacheCreation5m,
|
||||||
Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens,
|
Ephemeral1hInputTokens: cacheCreation1h,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return usage
|
return usage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NormalizeCacheCreationSplit(totalTokens int, tokens5m int, tokens1h int) (int, int) {
|
||||||
|
remainder := lo.Max([]int{totalTokens - tokens5m - tokens1h, 0})
|
||||||
|
return tokens5m + remainder, tokens1h
|
||||||
|
}
|
||||||
|
|
||||||
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
|
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
|
||||||
if info.ClaudeConvertInfo.Done {
|
if info.ClaudeConvertInfo.Done {
|
||||||
return nil
|
return nil
|
||||||
@@ -426,23 +436,28 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(openAIResponse.Choices) == 0 {
|
if len(openAIResponse.Choices) == 0 {
|
||||||
// no choices
|
// Some OpenAI-compatible upstreams end with a usage-only SSE chunk.
|
||||||
// 可能为非标准的 OpenAI 响应,判断是否已经完成
|
oaiUsage := openAIResponse.Usage
|
||||||
if info.ClaudeConvertInfo.Done {
|
if oaiUsage == nil {
|
||||||
|
oaiUsage = info.ClaudeConvertInfo.Usage
|
||||||
|
}
|
||||||
|
if oaiUsage != nil {
|
||||||
stopOpenBlocks()
|
stopOpenBlocks()
|
||||||
oaiUsage := info.ClaudeConvertInfo.Usage
|
stopReason := stopReasonOpenAI2Claude(info.FinishReason)
|
||||||
if oaiUsage != nil {
|
if stopReason == "" {
|
||||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
stopReason = "end_turn"
|
||||||
Type: "message_delta",
|
|
||||||
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
|
||||||
Delta: &dto.ClaudeMediaMessage{
|
|
||||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||||
|
Type: "message_delta",
|
||||||
|
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||||
|
Delta: &dto.ClaudeMediaMessage{
|
||||||
|
StopReason: common.GetPointer[string](stopReason),
|
||||||
|
},
|
||||||
|
})
|
||||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||||
Type: "message_stop",
|
Type: "message_stop",
|
||||||
})
|
})
|
||||||
|
info.ClaudeConvertInfo.Done = true
|
||||||
}
|
}
|
||||||
return claudeResponses
|
return claudeResponses
|
||||||
} else {
|
} else {
|
||||||
@@ -450,6 +465,13 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
|||||||
doneChunk := chosenChoice.FinishReason != nil && *chosenChoice.FinishReason != ""
|
doneChunk := chosenChoice.FinishReason != nil && *chosenChoice.FinishReason != ""
|
||||||
if doneChunk {
|
if doneChunk {
|
||||||
info.FinishReason = *chosenChoice.FinishReason
|
info.FinishReason = *chosenChoice.FinishReason
|
||||||
|
oaiUsage := openAIResponse.Usage
|
||||||
|
if oaiUsage == nil {
|
||||||
|
oaiUsage = info.ClaudeConvertInfo.Usage
|
||||||
|
// Some upstreams emit finish_reason first, then send a final usage-only chunk.
|
||||||
|
// Defer closing until usage is available so the final message_delta carries it.
|
||||||
|
return claudeResponses
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var claudeResponse dto.ClaudeResponse
|
var claudeResponse dto.ClaudeResponse
|
||||||
|
|||||||
Reference in New Issue
Block a user