feat: support claude-opus-4-7 (#4293)
* feat: support claude-opus-4-7 * feat: summarized display for opus 4.7
This commit is contained in:
@@ -448,6 +448,11 @@ func ProcessTools(tools []any) ([]*Tool, []*ClaudeWebSearchTool) {
|
|||||||
type Thinking struct {
|
type Thinking struct {
|
||||||
Type string `json:"type,omitempty"`
|
Type string `json:"type,omitempty"`
|
||||||
BudgetTokens *int `json:"budget_tokens,omitempty"`
|
BudgetTokens *int `json:"budget_tokens,omitempty"`
|
||||||
|
// Display controls whether thinking content is returned in the response.
|
||||||
|
// Used with adaptive thinking on Claude Opus 4.7+: "summarized" restores
|
||||||
|
// the visible summary that was default on Opus 4.6; "omitted" (default on
|
||||||
|
// 4.7) suppresses it. Pass-through field from upstream Anthropic API.
|
||||||
|
Display string `json:"display,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Thinking) GetBudgetTokens() int {
|
func (c *Thinking) GetBudgetTokens() int {
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ var awsModelIDMap = map[string]string{
|
|||||||
"claude-haiku-4-5-20251001": "anthropic.claude-haiku-4-5-20251001-v1:0",
|
"claude-haiku-4-5-20251001": "anthropic.claude-haiku-4-5-20251001-v1:0",
|
||||||
"claude-opus-4-5-20251101": "anthropic.claude-opus-4-5-20251101-v1:0",
|
"claude-opus-4-5-20251101": "anthropic.claude-opus-4-5-20251101-v1:0",
|
||||||
"claude-opus-4-6": "anthropic.claude-opus-4-6-v1",
|
"claude-opus-4-6": "anthropic.claude-opus-4-6-v1",
|
||||||
|
"claude-opus-4-7": "anthropic.claude-opus-4-7",
|
||||||
// Nova models
|
// Nova models
|
||||||
"nova-micro-v1:0": "amazon.nova-micro-v1:0",
|
"nova-micro-v1:0": "amazon.nova-micro-v1:0",
|
||||||
"nova-lite-v1:0": "amazon.nova-lite-v1:0",
|
"nova-lite-v1:0": "amazon.nova-lite-v1:0",
|
||||||
@@ -91,6 +92,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
|
|||||||
"ap": true,
|
"ap": true,
|
||||||
"eu": true,
|
"eu": true,
|
||||||
},
|
},
|
||||||
|
"anthropic.claude-opus-4-7": {
|
||||||
|
"us": true,
|
||||||
|
"ap": true,
|
||||||
|
"eu": true,
|
||||||
|
},
|
||||||
"anthropic.claude-haiku-4-5-20251001-v1:0": {
|
"anthropic.claude-haiku-4-5-20251001-v1:0": {
|
||||||
"us": true,
|
"us": true,
|
||||||
"ap": true,
|
"ap": true,
|
||||||
|
|||||||
@@ -26,6 +26,13 @@ var ModelList = []string{
|
|||||||
"claude-opus-4-6-medium",
|
"claude-opus-4-6-medium",
|
||||||
"claude-opus-4-6-low",
|
"claude-opus-4-6-low",
|
||||||
"claude-sonnet-4-6",
|
"claude-sonnet-4-6",
|
||||||
|
"claude-opus-4-7",
|
||||||
|
"claude-opus-4-7-max",
|
||||||
|
"claude-opus-4-7-xhigh",
|
||||||
|
"claude-opus-4-7-high",
|
||||||
|
"claude-opus-4-7-medium",
|
||||||
|
"claude-opus-4-7-low",
|
||||||
|
"claude-opus-4-7-thinking",
|
||||||
}
|
}
|
||||||
|
|
||||||
var ChannelName = "claude"
|
var ChannelName = "claude"
|
||||||
|
|||||||
@@ -154,33 +154,52 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
|
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
|
||||||
strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
|
(strings.HasPrefix(textRequest.Model, "claude-opus-4-6") || strings.HasPrefix(textRequest.Model, "claude-opus-4-7")) {
|
||||||
claudeRequest.Model = baseModel
|
claudeRequest.Model = baseModel
|
||||||
claudeRequest.Thinking = &dto.Thinking{
|
claudeRequest.Thinking = &dto.Thinking{
|
||||||
Type: "adaptive",
|
Type: "adaptive",
|
||||||
}
|
}
|
||||||
claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
||||||
claudeRequest.TopP = nil
|
if strings.HasPrefix(baseModel, "claude-opus-4-7") {
|
||||||
claudeRequest.Temperature = common.GetPointer[float64](1.0)
|
// Opus 4.7 rejects non-default temperature/top_p/top_k with 400
|
||||||
|
// and defaults display to "omitted"; restore the 4.6 visible summary.
|
||||||
|
claudeRequest.Thinking.Display = "summarized"
|
||||||
|
claudeRequest.Temperature = nil
|
||||||
|
claudeRequest.TopP = nil
|
||||||
|
claudeRequest.TopK = nil
|
||||||
|
} else {
|
||||||
|
claudeRequest.TopP = nil
|
||||||
|
claudeRequest.Temperature = common.GetPointer[float64](1.0)
|
||||||
|
}
|
||||||
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
||||||
strings.HasSuffix(textRequest.Model, "-thinking") {
|
strings.HasSuffix(textRequest.Model, "-thinking") {
|
||||||
|
|
||||||
// 因为BudgetTokens 必须大于1024
|
trimmedModel := strings.TrimSuffix(textRequest.Model, "-thinking")
|
||||||
if claudeRequest.MaxTokens == nil || *claudeRequest.MaxTokens < 1280 {
|
if strings.HasPrefix(trimmedModel, "claude-opus-4-7") {
|
||||||
claudeRequest.MaxTokens = common.GetPointer[uint](1280)
|
// Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort.
|
||||||
}
|
claudeRequest.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"}
|
||||||
|
claudeRequest.OutputConfig = json.RawMessage(`{"effort":"high"}`)
|
||||||
|
claudeRequest.Temperature = nil
|
||||||
|
claudeRequest.TopP = nil
|
||||||
|
claudeRequest.TopK = nil
|
||||||
|
} else {
|
||||||
|
// 因为BudgetTokens 必须大于1024
|
||||||
|
if claudeRequest.MaxTokens == nil || *claudeRequest.MaxTokens < 1280 {
|
||||||
|
claudeRequest.MaxTokens = common.GetPointer[uint](1280)
|
||||||
|
}
|
||||||
|
|
||||||
// BudgetTokens 为 max_tokens 的 80%
|
// BudgetTokens 为 max_tokens 的 80%
|
||||||
claudeRequest.Thinking = &dto.Thinking{
|
claudeRequest.Thinking = &dto.Thinking{
|
||||||
Type: "enabled",
|
Type: "enabled",
|
||||||
BudgetTokens: common.GetPointer[int](int(float64(*claudeRequest.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)),
|
BudgetTokens: common.GetPointer[int](int(float64(*claudeRequest.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)),
|
||||||
|
}
|
||||||
|
// TODO: 临时处理
|
||||||
|
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
|
||||||
|
claudeRequest.TopP = nil
|
||||||
|
claudeRequest.Temperature = common.GetPointer[float64](1.0)
|
||||||
}
|
}
|
||||||
// TODO: 临时处理
|
|
||||||
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
|
|
||||||
claudeRequest.TopP = nil
|
|
||||||
claudeRequest.Temperature = common.GetPointer[float64](1.0)
|
|
||||||
if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) {
|
if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) {
|
||||||
claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
|
claudeRequest.Model = trimmedModel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ var claudeModelMap = map[string]string{
|
|||||||
"claude-haiku-4-5-20251001": "claude-haiku-4-5@20251001",
|
"claude-haiku-4-5-20251001": "claude-haiku-4-5@20251001",
|
||||||
"claude-opus-4-5-20251101": "claude-opus-4-5@20251101",
|
"claude-opus-4-5-20251101": "claude-opus-4-5@20251101",
|
||||||
"claude-opus-4-6": "claude-opus-4-6",
|
"claude-opus-4-6": "claude-opus-4-6",
|
||||||
|
"claude-opus-4-7": "claude-opus-4-7",
|
||||||
}
|
}
|
||||||
|
|
||||||
const anthropicVersion = "vertex-2023-10-16"
|
const anthropicVersion = "vertex-2023-10-16"
|
||||||
|
|||||||
+32
-13
@@ -53,30 +53,49 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
}
|
}
|
||||||
|
|
||||||
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
|
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
|
||||||
strings.HasPrefix(request.Model, "claude-opus-4-6") {
|
(strings.HasPrefix(request.Model, "claude-opus-4-6") || strings.HasPrefix(request.Model, "claude-opus-4-7")) {
|
||||||
request.Model = baseModel
|
request.Model = baseModel
|
||||||
request.Thinking = &dto.Thinking{
|
request.Thinking = &dto.Thinking{
|
||||||
Type: "adaptive",
|
Type: "adaptive",
|
||||||
}
|
}
|
||||||
request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
|
||||||
request.Temperature = common.GetPointer[float64](1.0)
|
if strings.HasPrefix(request.Model, "claude-opus-4-7") {
|
||||||
|
// Opus 4.7 rejects non-default temperature/top_p/top_k with 400
|
||||||
|
// and defaults display to "omitted"; restore the 4.6 visible summary.
|
||||||
|
request.Thinking.Display = "summarized"
|
||||||
|
request.Temperature = nil
|
||||||
|
request.TopP = nil
|
||||||
|
request.TopK = nil
|
||||||
|
} else {
|
||||||
|
request.Temperature = common.GetPointer[float64](1.0)
|
||||||
|
}
|
||||||
info.UpstreamModelName = request.Model
|
info.UpstreamModelName = request.Model
|
||||||
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
|
||||||
strings.HasSuffix(request.Model, "-thinking") {
|
strings.HasSuffix(request.Model, "-thinking") {
|
||||||
if request.Thinking == nil {
|
if request.Thinking == nil {
|
||||||
// 因为BudgetTokens 必须大于1024
|
baseModel := strings.TrimSuffix(request.Model, "-thinking")
|
||||||
if request.MaxTokens == nil || *request.MaxTokens < 1280 {
|
if strings.HasPrefix(baseModel, "claude-opus-4-7") {
|
||||||
request.MaxTokens = common.GetPointer[uint](1280)
|
// Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort.
|
||||||
}
|
request.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"}
|
||||||
|
request.OutputConfig = json.RawMessage(`{"effort":"high"}`)
|
||||||
|
request.Temperature = nil
|
||||||
|
request.TopP = nil
|
||||||
|
request.TopK = nil
|
||||||
|
} else {
|
||||||
|
// 因为BudgetTokens 必须大于1024
|
||||||
|
if request.MaxTokens == nil || *request.MaxTokens < 1280 {
|
||||||
|
request.MaxTokens = common.GetPointer[uint](1280)
|
||||||
|
}
|
||||||
|
|
||||||
// BudgetTokens 为 max_tokens 的 80%
|
// BudgetTokens 为 max_tokens 的 80%
|
||||||
request.Thinking = &dto.Thinking{
|
request.Thinking = &dto.Thinking{
|
||||||
Type: "enabled",
|
Type: "enabled",
|
||||||
BudgetTokens: common.GetPointer[int](int(float64(*request.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)),
|
BudgetTokens: common.GetPointer[int](int(float64(*request.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage)),
|
||||||
|
}
|
||||||
|
// TODO: 临时处理
|
||||||
|
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
|
||||||
|
request.Temperature = common.GetPointer[float64](1.0)
|
||||||
}
|
}
|
||||||
// TODO: 临时处理
|
|
||||||
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
|
|
||||||
request.Temperature = common.GetPointer[float64](1.0)
|
|
||||||
}
|
}
|
||||||
if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
|
if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
|
||||||
request.Model = strings.TrimSuffix(request.Model, "-thinking")
|
request.Model = strings.TrimSuffix(request.Model, "-thinking")
|
||||||
|
|||||||
@@ -64,6 +64,13 @@ var defaultCacheRatio = map[string]float64{
|
|||||||
"claude-opus-4-6-high": 0.1,
|
"claude-opus-4-6-high": 0.1,
|
||||||
"claude-opus-4-6-medium": 0.1,
|
"claude-opus-4-6-medium": 0.1,
|
||||||
"claude-opus-4-6-low": 0.1,
|
"claude-opus-4-6-low": 0.1,
|
||||||
|
"claude-opus-4-7": 0.1,
|
||||||
|
"claude-opus-4-7-thinking": 0.1,
|
||||||
|
"claude-opus-4-7-max": 0.1,
|
||||||
|
"claude-opus-4-7-xhigh": 0.1,
|
||||||
|
"claude-opus-4-7-high": 0.1,
|
||||||
|
"claude-opus-4-7-medium": 0.1,
|
||||||
|
"claude-opus-4-7-low": 0.1,
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultCreateCacheRatio = map[string]float64{
|
var defaultCreateCacheRatio = map[string]float64{
|
||||||
@@ -92,6 +99,13 @@ var defaultCreateCacheRatio = map[string]float64{
|
|||||||
"claude-opus-4-6-high": 1.25,
|
"claude-opus-4-6-high": 1.25,
|
||||||
"claude-opus-4-6-medium": 1.25,
|
"claude-opus-4-6-medium": 1.25,
|
||||||
"claude-opus-4-6-low": 1.25,
|
"claude-opus-4-6-low": 1.25,
|
||||||
|
"claude-opus-4-7": 1.25,
|
||||||
|
"claude-opus-4-7-thinking": 1.25,
|
||||||
|
"claude-opus-4-7-max": 1.25,
|
||||||
|
"claude-opus-4-7-xhigh": 1.25,
|
||||||
|
"claude-opus-4-7-high": 1.25,
|
||||||
|
"claude-opus-4-7-medium": 1.25,
|
||||||
|
"claude-opus-4-7-low": 1.25,
|
||||||
}
|
}
|
||||||
|
|
||||||
//var defaultCreateCacheRatio = map[string]float64{}
|
//var defaultCreateCacheRatio = map[string]float64{}
|
||||||
|
|||||||
@@ -146,6 +146,12 @@ var defaultModelRatio = map[string]float64{
|
|||||||
"claude-opus-4-6-high": 2.5,
|
"claude-opus-4-6-high": 2.5,
|
||||||
"claude-opus-4-6-medium": 2.5,
|
"claude-opus-4-6-medium": 2.5,
|
||||||
"claude-opus-4-6-low": 2.5,
|
"claude-opus-4-6-low": 2.5,
|
||||||
|
"claude-opus-4-7": 2.5,
|
||||||
|
"claude-opus-4-7-max": 2.5,
|
||||||
|
"claude-opus-4-7-xhigh": 2.5,
|
||||||
|
"claude-opus-4-7-high": 2.5,
|
||||||
|
"claude-opus-4-7-medium": 2.5,
|
||||||
|
"claude-opus-4-7-low": 2.5,
|
||||||
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
||||||
"claude-opus-4-20250514": 7.5,
|
"claude-opus-4-20250514": 7.5,
|
||||||
"claude-opus-4-1-20250805": 7.5,
|
"claude-opus-4-1-20250805": 7.5,
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
)
|
)
|
||||||
|
|
||||||
var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
|
var EffortSuffixes = []string{"-max", "-xhigh", "-high", "-medium", "-low", "-minimal"}
|
||||||
|
|
||||||
// TrimEffortSuffix -> modelName level(low) exists
|
// TrimEffortSuffix -> modelName level(low) exists
|
||||||
func TrimEffortSuffix(modelName string) (string, string, bool) {
|
func TrimEffortSuffix(modelName string) (string, string, bool) {
|
||||||
|
|||||||
Reference in New Issue
Block a user