117 lines
3.3 KiB
Go
117 lines
3.3 KiB
Go
package service
|
|
|
|
import (
|
|
"github.com/QuantumNous/new-api/dto"
|
|
"github.com/QuantumNous/new-api/pkg/billingexpr"
|
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
|
)
|
|
|
|
// TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
|
|
type TieredResultWrapper = billingexpr.TieredResult
|
|
|
|
// BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
|
|
// normalizing P and C so they mean "tokens not separately priced by the
|
|
// expression". Sub-categories (cache, image, audio) are only subtracted
|
|
// when the expression references them via their own variable.
|
|
//
|
|
// GPT-format APIs report prompt_tokens / completion_tokens as totals that
|
|
// include all sub-categories (cache, image, audio). Claude-format APIs
|
|
// report them as text-only. This function normalizes to text-only when
|
|
// sub-categories are separately priced.
|
|
func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
|
|
p := float64(usage.PromptTokens)
|
|
c := float64(usage.CompletionTokens)
|
|
cr := float64(usage.PromptTokensDetails.CachedTokens)
|
|
cc5m := float64(usage.PromptTokensDetails.CachedCreationTokens)
|
|
cc1h := float64(0)
|
|
|
|
if usage.UsageSemantic == "anthropic" {
|
|
cc1h = float64(usage.ClaudeCacheCreation1hTokens)
|
|
cc5m = float64(usage.ClaudeCacheCreation5mTokens)
|
|
}
|
|
|
|
img := float64(usage.PromptTokensDetails.ImageTokens)
|
|
ai := float64(usage.PromptTokensDetails.AudioTokens)
|
|
imgO := float64(usage.CompletionTokenDetails.ImageTokens)
|
|
ao := float64(usage.CompletionTokenDetails.AudioTokens)
|
|
|
|
// len = total input context length for tier condition evaluation.
|
|
// Non-Claude: prompt_tokens already includes everything.
|
|
// Claude: input_tokens is text-only, so add cache read + cache creation.
|
|
inputLen := p
|
|
if isClaudeUsageSemantic {
|
|
inputLen = p + cr + cc5m + cc1h
|
|
}
|
|
|
|
if !isClaudeUsageSemantic {
|
|
if usedVars["cr"] {
|
|
p -= cr
|
|
}
|
|
if usedVars["cc"] {
|
|
p -= cc5m
|
|
}
|
|
if usedVars["cc1h"] {
|
|
p -= cc1h
|
|
}
|
|
if usedVars["img"] {
|
|
p -= img
|
|
}
|
|
if usedVars["ai"] {
|
|
p -= ai
|
|
}
|
|
if usedVars["img_o"] {
|
|
c -= imgO
|
|
}
|
|
if usedVars["ao"] {
|
|
c -= ao
|
|
}
|
|
}
|
|
|
|
if p < 0 {
|
|
p = 0
|
|
}
|
|
if c < 0 {
|
|
c = 0
|
|
}
|
|
|
|
return billingexpr.TokenParams{
|
|
P: p,
|
|
C: c,
|
|
Len: inputLen,
|
|
CR: cr,
|
|
CC: cc5m,
|
|
CC1h: cc1h,
|
|
Img: img,
|
|
ImgO: imgO,
|
|
AI: ai,
|
|
AO: ao,
|
|
}
|
|
}
|
|
|
|
// TryTieredSettle checks if the request uses tiered_expr billing and, if so,
|
|
// computes the actual quota using the frozen BillingSnapshot. Returns:
|
|
// - ok=true, quota, result when tiered billing applies
|
|
// - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
|
|
func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
|
|
snap := relayInfo.TieredBillingSnapshot
|
|
if snap == nil || snap.BillingMode != "tiered_expr" {
|
|
return false, 0, nil
|
|
}
|
|
|
|
requestInput := billingexpr.RequestInput{}
|
|
if relayInfo.BillingRequestInput != nil {
|
|
requestInput = *relayInfo.BillingRequestInput
|
|
}
|
|
|
|
tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
|
|
if err != nil {
|
|
quota = relayInfo.FinalPreConsumedQuota
|
|
if quota <= 0 {
|
|
quota = snap.EstimatedQuotaAfterGroup
|
|
}
|
|
return true, quota, nil
|
|
}
|
|
|
|
return true, tr.ActualQuotaAfterGroup, &tr
|
|
}
|