Files
new-api/service/tiered_settle.go
T
CaIon c5405b2a12 feat: add billing expression system documentation and enhance tiered billing logic
- Introduced a new rule for the Billing Expression System, emphasizing the importance of reading `pkg/billingexpr/expr.md` for dynamic billing.
- Updated the billing expression logic to support new variables and improved handling of image and audio tokens.
- Enhanced the tiered billing functionality with versioning support for expressions and refined quota calculations.
- Added tests to validate the new billing expression features and ensure correctness in pricing calculations.
2026-03-17 16:59:25 +08:00

99 lines
2.9 KiB
Go

package service
import (
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/billingexpr"
relaycommon "github.com/QuantumNous/new-api/relay/common"
)
// TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
type TieredResultWrapper = billingexpr.TieredResult
// BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
// normalizing P and C so they mean "tokens not separately priced by the
// expression". Sub-categories (cache, image, audio) are only subtracted
// when the expression references them via their own variable.
//
// GPT-format APIs report prompt_tokens / completion_tokens as totals that
// include all sub-categories (cache, image, audio). Claude-format APIs
// report them as text-only. This function normalizes to text-only when
// sub-categories are separately priced.
func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
p := float64(usage.PromptTokens)
c := float64(usage.CompletionTokens)
cr := float64(usage.PromptTokensDetails.CachedTokens)
ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
cc1h := float64(usage.ClaudeCacheCreation1hTokens)
img := float64(usage.PromptTokensDetails.ImageTokens)
ai := float64(usage.PromptTokensDetails.AudioTokens)
imgO := float64(usage.CompletionTokenDetails.ImageTokens)
ao := float64(usage.CompletionTokenDetails.AudioTokens)
if !isClaudeUsageSemantic {
if usedVars["cr"] {
p -= cr
}
if usedVars["cc"] || usedVars["cc1h"] {
p -= ccTotal
}
if usedVars["img"] {
p -= img
}
if usedVars["ai"] {
p -= ai
}
if usedVars["img_o"] {
c -= imgO
}
if usedVars["ao"] {
c -= ao
}
}
if p < 0 {
p = 0
}
if c < 0 {
c = 0
}
return billingexpr.TokenParams{
P: p,
C: c,
CR: cr,
CC: ccTotal - cc1h,
CC1h: cc1h,
Img: img,
ImgO: imgO,
AI: ai,
AO: ao,
}
}
// TryTieredSettle checks if the request uses tiered_expr billing and, if so,
// computes the actual quota using the frozen BillingSnapshot. Returns:
// - ok=true, quota, result when tiered billing applies
// - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
snap := relayInfo.TieredBillingSnapshot
if snap == nil || snap.BillingMode != "tiered_expr" {
return false, 0, nil
}
requestInput := billingexpr.RequestInput{}
if relayInfo.BillingRequestInput != nil {
requestInput = *relayInfo.BillingRequestInput
}
tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
if err != nil {
quota = relayInfo.FinalPreConsumedQuota
if quota <= 0 {
quota = snap.EstimatedQuotaAfterGroup
}
return true, quota, nil
}
return true, tr.ActualQuotaAfterGroup, &tr
}