From 47d7bca268494d66cd80f7200e8e29c1ae335feb Mon Sep 17 00:00:00 2001 From: papersnake Date: Fri, 17 Apr 2026 13:52:34 +0800 Subject: [PATCH] feat: support claude-opus-4-7 (#4293) * feat: support claude-opus-4-7 * feat: summarized display for opus 4.7 --- dto/claude.go | 5 +++ relay/channel/aws/constants.go | 6 ++++ relay/channel/claude/constants.go | 7 ++++ relay/channel/claude/relay-claude.go | 51 +++++++++++++++++++--------- relay/channel/vertex/adaptor.go | 1 + relay/claude_handler.go | 45 +++++++++++++++++------- setting/ratio_setting/cache_ratio.go | 14 ++++++++ setting/ratio_setting/model_ratio.go | 6 ++++ setting/reasoning/suffix.go | 2 +- 9 files changed, 107 insertions(+), 30 deletions(-) diff --git a/dto/claude.go b/dto/claude.go index b35d6565..d7fed412 100644 --- a/dto/claude.go +++ b/dto/claude.go @@ -448,6 +448,11 @@ func ProcessTools(tools []any) ([]*Tool, []*ClaudeWebSearchTool) { type Thinking struct { Type string `json:"type,omitempty"` BudgetTokens *int `json:"budget_tokens,omitempty"` + // Display controls whether thinking content is returned in the response. + // Used with adaptive thinking on Claude Opus 4.7+: "summarized" restores + // the visible summary that was default on Opus 4.6; "omitted" (default on + // 4.7) suppresses it. Pass-through field from upstream Anthropic API. + Display string `json:"display,omitempty"` } func (c *Thinking) GetBudgetTokens() int { diff --git a/relay/channel/aws/constants.go b/relay/channel/aws/constants.go index 55f87ecf..ff1f377e 100644 --- a/relay/channel/aws/constants.go +++ b/relay/channel/aws/constants.go @@ -18,6 +18,7 @@ var awsModelIDMap = map[string]string{ "claude-haiku-4-5-20251001": "anthropic.claude-haiku-4-5-20251001-v1:0", "claude-opus-4-5-20251101": "anthropic.claude-opus-4-5-20251101-v1:0", "claude-opus-4-6": "anthropic.claude-opus-4-6-v1", + "claude-opus-4-7": "anthropic.claude-opus-4-7", // Nova models "nova-micro-v1:0": "amazon.nova-micro-v1:0", "nova-lite-v1:0": "amazon.nova-lite-v1:0", @@ -91,6 +92,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{ "ap": true, "eu": true, }, + "anthropic.claude-opus-4-7": { + "us": true, + "ap": true, + "eu": true, + }, "anthropic.claude-haiku-4-5-20251001-v1:0": { "us": true, "ap": true, diff --git a/relay/channel/claude/constants.go b/relay/channel/claude/constants.go index 1a3fac56..3c516aef 100644 --- a/relay/channel/claude/constants.go +++ b/relay/channel/claude/constants.go @@ -26,6 +26,13 @@ var ModelList = []string{ "claude-opus-4-6-medium", "claude-opus-4-6-low", "claude-sonnet-4-6", + "claude-opus-4-7", + "claude-opus-4-7-max", + "claude-opus-4-7-xhigh", + "claude-opus-4-7-high", + "claude-opus-4-7-medium", + "claude-opus-4-7-low", + "claude-opus-4-7-thinking", } var ChannelName = "claude" diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index e7f351b7..fa823452 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -154,33 +154,52 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe } if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" && - strings.HasPrefix(textRequest.Model, "claude-opus-4-6") { + (strings.HasPrefix(textRequest.Model, "claude-opus-4-6") || strings.HasPrefix(textRequest.Model, "claude-opus-4-7")) { claudeRequest.Model = baseModel claudeRequest.Thinking = &dto.Thinking{ Type: "adaptive", } claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) - claudeRequest.TopP = nil - claudeRequest.Temperature = common.GetPointer[float64](1.0) + if strings.HasPrefix(baseModel, "claude-opus-4-7") { + // Opus 4.7 rejects non-default temperature/top_p/top_k with 400 + // and defaults display to "omitted"; restore the 4.6 visible summary. + claudeRequest.Thinking.Display = "summarized" + claudeRequest.Temperature = nil + claudeRequest.TopP = nil + claudeRequest.TopK = nil + } else { + claudeRequest.TopP = nil + claudeRequest.Temperature = common.GetPointer[float64](1.0) + } } else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && strings.HasSuffix(textRequest.Model, "-thinking") { - // 因为BudgetTokens 必须大于1024 - if claudeRequest.MaxTokens == nil || *claudeRequest.MaxTokens < 1280 { - claudeRequest.MaxTokens = common.GetPointer[uint](1280) - } + trimmedModel := strings.TrimSuffix(textRequest.Model, "-thinking") + if strings.HasPrefix(trimmedModel, "claude-opus-4-7") { + // Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort. + claudeRequest.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"} + claudeRequest.OutputConfig = json.RawMessage(`{"effort":"high"}`) + claudeRequest.Temperature = nil + claudeRequest.TopP = nil + claudeRequest.TopK = nil + } else { + // 因为BudgetTokens 必须大于1024 + if claudeRequest.MaxTokens == nil || *claudeRequest.MaxTokens < 1280 { + claudeRequest.MaxTokens = common.GetPointer[uint](1280) + } - // BudgetTokens 为 max_tokens 的 80% - claudeRequest.Thinking = &dto.Thinking{ - Type: "enabled", - BudgetTokens: common.GetPointer[int](int(float64(*claudeRequest.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)), + // BudgetTokens 为 max_tokens 的 80% + claudeRequest.Thinking = &dto.Thinking{ + Type: "enabled", + BudgetTokens: common.GetPointer[int](int(float64(*claudeRequest.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)), + } + // TODO: 临时处理 + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking + claudeRequest.TopP = nil + claudeRequest.Temperature = common.GetPointer[float64](1.0) } - // TODO: 临时处理 - // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking - claudeRequest.TopP = nil - claudeRequest.Temperature = common.GetPointer[float64](1.0) if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) { - claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking") + claudeRequest.Model = trimmedModel } } diff --git a/relay/channel/vertex/adaptor.go b/relay/channel/vertex/adaptor.go index 7e56c52b..5cf1fcc0 100644 --- a/relay/channel/vertex/adaptor.go +++ b/relay/channel/vertex/adaptor.go @@ -44,6 +44,7 @@ var claudeModelMap = map[string]string{ "claude-haiku-4-5-20251001": "claude-haiku-4-5@20251001", "claude-opus-4-5-20251101": "claude-opus-4-5@20251101", "claude-opus-4-6": "claude-opus-4-6", + "claude-opus-4-7": "claude-opus-4-7", } const anthropicVersion = "vertex-2023-10-16" diff --git a/relay/claude_handler.go b/relay/claude_handler.go index dc4c93f8..54f8ced2 100644 --- a/relay/claude_handler.go +++ b/relay/claude_handler.go @@ -53,30 +53,49 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ } if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" && - strings.HasPrefix(request.Model, "claude-opus-4-6") { + (strings.HasPrefix(request.Model, "claude-opus-4-6") || strings.HasPrefix(request.Model, "claude-opus-4-7")) { request.Model = baseModel request.Thinking = &dto.Thinking{ Type: "adaptive", } request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) - request.Temperature = common.GetPointer[float64](1.0) + if strings.HasPrefix(request.Model, "claude-opus-4-7") { + // Opus 4.7 rejects non-default temperature/top_p/top_k with 400 + // and defaults display to "omitted"; restore the 4.6 visible summary. + request.Thinking.Display = "summarized" + request.Temperature = nil + request.TopP = nil + request.TopK = nil + } else { + request.Temperature = common.GetPointer[float64](1.0) + } info.UpstreamModelName = request.Model } else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && strings.HasSuffix(request.Model, "-thinking") { if request.Thinking == nil { - // 因为BudgetTokens 必须大于1024 - if request.MaxTokens == nil || *request.MaxTokens < 1280 { - request.MaxTokens = common.GetPointer[uint](1280) - } + baseModel := strings.TrimSuffix(request.Model, "-thinking") + if strings.HasPrefix(baseModel, "claude-opus-4-7") { + // Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort. + request.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"} + request.OutputConfig = json.RawMessage(`{"effort":"high"}`) + request.Temperature = nil + request.TopP = nil + request.TopK = nil + } else { + // 因为BudgetTokens 必须大于1024 + if request.MaxTokens == nil || *request.MaxTokens < 1280 { + request.MaxTokens = common.GetPointer[uint](1280) + } - // BudgetTokens 为 max_tokens 的 80% - request.Thinking = &dto.Thinking{ - Type: "enabled", - BudgetTokens: common.GetPointer[int](int(float64(*request.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)), + // BudgetTokens 为 max_tokens 的 80% + request.Thinking = &dto.Thinking{ + Type: "enabled", + BudgetTokens: common.GetPointer[int](int(float64(*request.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)), + } + // TODO: 临时处理 + // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking + request.Temperature = common.GetPointer[float64](1.0) } - // TODO: 临时处理 - // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking - request.Temperature = common.GetPointer[float64](1.0) } if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) { request.Model = strings.TrimSuffix(request.Model, "-thinking") diff --git a/setting/ratio_setting/cache_ratio.go b/setting/ratio_setting/cache_ratio.go index 2c75ab48..fe6e3b32 100644 --- a/setting/ratio_setting/cache_ratio.go +++ b/setting/ratio_setting/cache_ratio.go @@ -64,6 +64,13 @@ var defaultCacheRatio = map[string]float64{ "claude-opus-4-6-high": 0.1, "claude-opus-4-6-medium": 0.1, "claude-opus-4-6-low": 0.1, + "claude-opus-4-7": 0.1, + "claude-opus-4-7-thinking": 0.1, + "claude-opus-4-7-max": 0.1, + "claude-opus-4-7-xhigh": 0.1, + "claude-opus-4-7-high": 0.1, + "claude-opus-4-7-medium": 0.1, + "claude-opus-4-7-low": 0.1, } var defaultCreateCacheRatio = map[string]float64{ @@ -92,6 +99,13 @@ var defaultCreateCacheRatio = map[string]float64{ "claude-opus-4-6-high": 1.25, "claude-opus-4-6-medium": 1.25, "claude-opus-4-6-low": 1.25, + "claude-opus-4-7": 1.25, + "claude-opus-4-7-thinking": 1.25, + "claude-opus-4-7-max": 1.25, + "claude-opus-4-7-xhigh": 1.25, + "claude-opus-4-7-high": 1.25, + "claude-opus-4-7-medium": 1.25, + "claude-opus-4-7-low": 1.25, } //var defaultCreateCacheRatio = map[string]float64{} diff --git a/setting/ratio_setting/model_ratio.go b/setting/ratio_setting/model_ratio.go index 6899d92c..7556fd94 100644 --- a/setting/ratio_setting/model_ratio.go +++ b/setting/ratio_setting/model_ratio.go @@ -146,6 +146,12 @@ var defaultModelRatio = map[string]float64{ "claude-opus-4-6-high": 2.5, "claude-opus-4-6-medium": 2.5, "claude-opus-4-6-low": 2.5, + "claude-opus-4-7": 2.5, + "claude-opus-4-7-max": 2.5, + "claude-opus-4-7-xhigh": 2.5, + "claude-opus-4-7-high": 2.5, + "claude-opus-4-7-medium": 2.5, + "claude-opus-4-7-low": 2.5, "claude-3-opus-20240229": 7.5, // $15 / 1M tokens "claude-opus-4-20250514": 7.5, "claude-opus-4-1-20250805": 7.5, diff --git a/setting/reasoning/suffix.go b/setting/reasoning/suffix.go index fb66c601..2b95de6d 100644 --- a/setting/reasoning/suffix.go +++ b/setting/reasoning/suffix.go @@ -6,7 +6,7 @@ import ( "github.com/samber/lo" ) -var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"} +var EffortSuffixes = []string{"-max", "-xhigh", "-high", "-medium", "-low", "-minimal"} // TrimEffortSuffix -> modelName level(low) exists func TrimEffortSuffix(modelName string) (string, string, bool) {