refactor: update billing calculations to use quota per unit

- Adjusted billing calculations in tests and core logic to incorporate a new QuotaPerUnit field.
- Modified estimated quota calculations to reflect changes in tiered billing logic.
- Updated related tests to ensure accuracy with the new quota calculations.
- Enhanced dynamic pricing components to align with updated billing expressions.
This commit is contained in:
CaIon
2026-03-16 20:11:55 +08:00
parent f0589cc478
commit f6c0852da9
10 changed files with 160 additions and 132 deletions
+75 -59
View File
@@ -313,9 +313,10 @@ func TestComputeTieredQuota_Basic(t *testing.T) {
GroupRatio: 1.0,
EstimatedPromptTokens: 100000,
EstimatedCompletionTokens: 5000,
EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound(100000*1.5 + 5000*7.5),
EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000),
EstimatedTier: "standard",
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 300000, C: 10000})
@@ -323,7 +324,7 @@ func TestComputeTieredQuota_Basic(t *testing.T) {
t.Fatal(err)
}
wantBefore := 300000*3.0 + 10000*11.25
wantBefore := (300000*3.0 + 10000*11.25) / 1_000_000 * 500_000
if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
}
@@ -343,9 +344,10 @@ func TestComputeTieredQuota_SameTier(t *testing.T) {
GroupRatio: 1.5,
EstimatedPromptTokens: 50000,
EstimatedCompletionTokens: 1000,
EstimatedQuotaBeforeGroup: 50000*1.5 + 1000*7.5,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((50000*1.5 + 1000*7.5) * 1.5),
EstimatedQuotaBeforeGroup: (50000*1.5 + 1000*7.5) / 1_000_000 * 500_000,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((50000*1.5 + 1000*7.5) / 1_000_000 * 500_000 * 1.5),
EstimatedTier: "standard",
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 80000, C: 2000})
@@ -353,7 +355,7 @@ func TestComputeTieredQuota_SameTier(t *testing.T) {
t.Fatal(err)
}
wantBefore := 80000*1.5 + 2000*7.5
wantBefore := (80000*1.5 + 2000*7.5) / 1_000_000 * 500_000
wantAfter := billingexpr.QuotaRound(wantBefore * 1.5)
if result.ActualQuotaAfterGroup != wantAfter {
t.Errorf("after group: got %d, want %d", result.ActualQuotaAfterGroup, wantAfter)
@@ -534,9 +536,10 @@ func TestComputeTieredQuota_WithCache(t *testing.T) {
GroupRatio: 1.0,
EstimatedPromptTokens: 100000,
EstimatedCompletionTokens: 5000,
EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound(100000*1.5 + 5000*7.5),
EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000),
EstimatedTier: "standard",
QuotaPerUnit: 500_000,
}
params := billingexpr.TokenParams{P: 100000, C: 5000, CR: 50000, CC: 10000}
@@ -545,7 +548,7 @@ func TestComputeTieredQuota_WithCache(t *testing.T) {
t.Fatal(err)
}
wantBefore := 100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875
wantBefore := (100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875) / 1_000_000 * 500_000
if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
}
@@ -565,9 +568,10 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) {
GroupRatio: 2.0,
EstimatedPromptTokens: 100000,
EstimatedCompletionTokens: 5000,
EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) * 2.0),
EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000 * 2.0),
EstimatedTier: "standard",
QuotaPerUnit: 500_000,
}
params := billingexpr.TokenParams{P: 300000, C: 10000, CR: 50000, CC: 10000}
@@ -576,7 +580,7 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) {
t.Fatal(err)
}
wantBefore := 300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75
wantBefore := (300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75) / 1_000_000 * 500_000
wantAfter := billingexpr.QuotaRound(wantBefore * 2.0)
if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
@@ -646,6 +650,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
estCost, estTrace, _ := billingexpr.RunExpr(claudeWithCacheExpr, estParams)
const qpu = 500_000.0
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: claudeWithCacheExpr,
@@ -653,9 +658,10 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
GroupRatio: groupRatio,
EstimatedPromptTokens: int(estParams.P),
EstimatedCompletionTokens: int(estParams.C),
EstimatedQuotaBeforeGroup: estCost,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound(estCost * groupRatio),
EstimatedQuotaBeforeGroup: estCost / 1_000_000 * qpu,
EstimatedQuotaAfterGroup: billingexpr.QuotaRound(estCost / 1_000_000 * qpu * groupRatio),
EstimatedTier: estTrace.MatchedTier,
QuotaPerUnit: qpu,
}
result, err := billingexpr.ComputeTieredQuota(snap, actParams)
@@ -664,7 +670,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
}
directCost, _, _ := billingexpr.RunExpr(claudeWithCacheExpr, actParams)
directQuota := billingexpr.QuotaRound(directCost * groupRatio)
directQuota := billingexpr.QuotaRound(directCost / 1_000_000 * qpu * groupRatio)
if result.ActualQuotaAfterGroup != directQuota {
t.Errorf("iter %d: settlement %d != direct %d", i, result.ActualQuotaAfterGroup, directQuota)
@@ -679,21 +685,23 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
func TestComputeTieredQuota_BasicSettlement(t *testing.T) {
exprStr := `tier("default", p + c)`
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3000, C: 2000})
if err != nil {
t.Fatal(err)
}
if math.Abs(result.ActualQuotaBeforeGroup-5000) > 1e-6 {
t.Errorf("before group = %f, want 5000", result.ActualQuotaBeforeGroup)
// exprOutput = 5000; quota = 5000 / 1M * 500K = 2500
if math.Abs(result.ActualQuotaBeforeGroup-2500) > 1e-6 {
t.Errorf("before group = %f, want 2500", result.ActualQuotaBeforeGroup)
}
if result.ActualQuotaAfterGroup != 5000 {
t.Errorf("after group = %d, want 5000", result.ActualQuotaAfterGroup)
if result.ActualQuotaAfterGroup != 2500 {
t.Errorf("after group = %d, want 2500", result.ActualQuotaAfterGroup)
}
if result.MatchedTier != "default" {
t.Errorf("tier = %q, want default", result.MatchedTier)
@@ -703,29 +711,31 @@ func TestComputeTieredQuota_BasicSettlement(t *testing.T) {
func TestComputeTieredQuota_WithGroupRatio(t *testing.T) {
exprStr := `tier("default", p + c)`
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 2.0,
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 2.0,
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 1000, C: 500})
if err != nil {
t.Fatal(err)
}
// cost = 1500, after group = round(1500 * 2.0) = 3000
if result.ActualQuotaAfterGroup != 3000 {
t.Errorf("after group = %d, want 3000", result.ActualQuotaAfterGroup)
// exprOutput = 1500; quotaBeforeGroup = 750; afterGroup = round(750 * 2.0) = 1500
if result.ActualQuotaAfterGroup != 1500 {
t.Errorf("after group = %d, want 1500", result.ActualQuotaAfterGroup)
}
}
func TestComputeTieredQuota_ZeroTokens(t *testing.T) {
exprStr := `tier("default", p * 2 + c * 10)`
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{})
@@ -738,40 +748,42 @@ func TestComputeTieredQuota_ZeroTokens(t *testing.T) {
}
func TestComputeTieredQuota_RoundingEdge(t *testing.T) {
exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 -> round to 2
exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 (expr); 1.5 / 1M * 500K = 0.75; round(0.75) = 1
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3})
if err != nil {
t.Fatal(err)
}
// 3 * 0.5 = 1.5, round(1.5) = 2
if result.ActualQuotaAfterGroup != 2 {
t.Errorf("after group = %d, want 2 (round 1.5 up)", result.ActualQuotaAfterGroup)
// 3 * 0.5 = 1.5 (expr); quota = 1.5 / 1M * 500K = 0.75; round(0.75) = 1
if result.ActualQuotaAfterGroup != 1 {
t.Errorf("after group = %d, want 1 (round 0.75 up)", result.ActualQuotaAfterGroup)
}
}
func TestComputeTieredQuota_RoundingEdgeDown(t *testing.T) {
exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 -> round to 1
exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 (expr); 1.2 / 1M * 500K = 0.6; round(0.6) = 1
snap := &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
BillingMode: "tiered_expr",
ExprString: exprStr,
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
QuotaPerUnit: 500_000,
}
result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3})
if err != nil {
t.Fatal(err)
}
// 3 * 0.4 = 1.2, round(1.2) = 1
// 3 * 0.4 = 1.2 (expr); quota = 1.2 / 1M * 500K = 0.6; round(0.6) = 1
if result.ActualQuotaAfterGroup != 1 {
t.Errorf("after group = %d, want 1 (round 1.2 down)", result.ActualQuotaAfterGroup)
t.Errorf("after group = %d, want 1 (round 0.6 up)", result.ActualQuotaAfterGroup)
}
}
@@ -783,6 +795,7 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
EstimatedTier: "normal",
QuotaPerUnit: 500_000,
}
// Without request: normal tier
@@ -790,8 +803,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if r1.ActualQuotaAfterGroup != 2000 {
t.Errorf("normal = %d, want 2000", r1.ActualQuotaAfterGroup)
// normal: p*2 = 2000; quota = 2000 / 1M * 500K = 1000
if r1.ActualQuotaAfterGroup != 1000 {
t.Errorf("normal = %d, want 1000", r1.ActualQuotaAfterGroup)
}
// With request: fast tier
@@ -801,8 +815,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if r2.ActualQuotaAfterGroup != 4000 {
t.Errorf("fast = %d, want 4000", r2.ActualQuotaAfterGroup)
// fast: p*4 = 4000; quota = 4000 / 1M * 500K = 2000
if r2.ActualQuotaAfterGroup != 2000 {
t.Errorf("fast = %d, want 2000", r2.ActualQuotaAfterGroup)
}
if !r2.CrossedTier {
t.Error("expected CrossedTier = true when probe changes tier")
@@ -817,9 +832,10 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
ExprHash: billingexpr.ExprHashString(exprStr),
GroupRatio: 1.0,
EstimatedTier: "small",
QuotaPerUnit: 500_000,
}
// At boundary
// At boundary: small, p*1 = 100000; quota = 100000 / 1M * 500K = 50000
r1, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100000})
if err != nil {
t.Fatal(err)
@@ -827,11 +843,11 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
if r1.MatchedTier != "small" {
t.Errorf("at boundary: tier = %s, want small", r1.MatchedTier)
}
if r1.ActualQuotaAfterGroup != 100000 {
t.Errorf("at boundary: quota = %d, want 100000", r1.ActualQuotaAfterGroup)
if r1.ActualQuotaAfterGroup != 50000 {
t.Errorf("at boundary: quota = %d, want 50000", r1.ActualQuotaAfterGroup)
}
// Past boundary
// Past boundary: large, p*2 = 200002; quota = 200002 / 1M * 500K = 100001
r2, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100001})
if err != nil {
t.Fatal(err)
@@ -839,8 +855,8 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
if r2.MatchedTier != "large" {
t.Errorf("past boundary: tier = %s, want large", r2.MatchedTier)
}
if r2.ActualQuotaAfterGroup != 200002 {
t.Errorf("past boundary: quota = %d, want 200002", r2.ActualQuotaAfterGroup)
if r2.ActualQuotaAfterGroup != 100001 {
t.Errorf("past boundary: quota = %d, want 100001", r2.ActualQuotaAfterGroup)
}
if !r2.CrossedTier {
t.Error("expected CrossedTier = true")
+3 -2
View File
@@ -12,11 +12,12 @@ func ComputeTieredQuotaWithRequest(snap *BillingSnapshot, params TokenParams, re
return TieredResult{}, err
}
afterGroup := QuotaRound(cost * snap.GroupRatio)
quotaBeforeGroup := cost / 1_000_000 * snap.QuotaPerUnit
afterGroup := QuotaRound(quotaBeforeGroup * snap.GroupRatio)
crossed := trace.MatchedTier != snap.EstimatedTier
return TieredResult{
ActualQuotaBeforeGroup: cost,
ActualQuotaBeforeGroup: quotaBeforeGroup,
ActualQuotaAfterGroup: afterGroup,
MatchedTier: trace.MatchedTier,
CrossedTier: crossed,
+1
View File
@@ -45,6 +45,7 @@ type BillingSnapshot struct {
EstimatedQuotaBeforeGroup float64 `json:"estimated_quota_before_group"`
EstimatedQuotaAfterGroup int `json:"estimated_quota_after_group"`
EstimatedTier string `json:"estimated_tier"`
QuotaPerUnit float64 `json:"quota_per_unit"`
}
// TieredResult holds everything needed after running tiered settlement.
+8 -5
View File
@@ -225,7 +225,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
return types.PriceData{}, err
}
rawQuota, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
P: float64(promptTokens),
C: float64(estimatedCompletionTokens),
}, requestInput)
@@ -233,11 +233,13 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err)
}
preConsumedQuota := billingexpr.QuotaRound(rawQuota * groupRatioInfo.GroupRatio)
// Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does.
quotaBeforeGroup := rawCost / 1_000_000 * common.QuotaPerUnit
preConsumedQuota := billingexpr.QuotaRound(quotaBeforeGroup * groupRatioInfo.GroupRatio)
freeModel := false
if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
if groupRatioInfo.GroupRatio == 0 || rawQuota == 0 {
if groupRatioInfo.GroupRatio == 0 || quotaBeforeGroup == 0 {
preConsumedQuota = 0
freeModel = true
}
@@ -252,9 +254,10 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
GroupRatio: groupRatioInfo.GroupRatio,
EstimatedPromptTokens: promptTokens,
EstimatedCompletionTokens: estimatedCompletionTokens,
EstimatedQuotaBeforeGroup: rawQuota,
EstimatedQuotaBeforeGroup: quotaBeforeGroup,
EstimatedQuotaAfterGroup: preConsumedQuota,
EstimatedTier: trace.MatchedTier,
QuotaPerUnit: common.QuotaPerUnit,
}
info.TieredBillingSnapshot = snapshot
info.BillingRequestInput = &requestInput
@@ -266,7 +269,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
}
if common.DebugEnabled {
println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d rawQuota=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, rawQuota, groupRatioInfo.GroupRatio, trace.MatchedTier))
println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, quotaBeforeGroup, groupRatioInfo.GroupRatio, trace.MatchedTier))
}
info.PriceData = priceData
+46 -40
View File
@@ -19,6 +19,8 @@ const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h *
// Expression with request probes
const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`
const testQuotaPerUnit = 500_000.0
func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
return &billingexpr.BillingSnapshot{
BillingMode: "tiered_expr",
@@ -27,14 +29,16 @@ func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int)
GroupRatio: groupRatio,
EstimatedPromptTokens: estPrompt,
EstimatedCompletionTokens: estCompletion,
QuotaPerUnit: testQuotaPerUnit,
}
}
func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
snap.EstimatedQuotaBeforeGroup = cost
snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(cost * groupRatio)
quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
snap.EstimatedTier = trace.MatchedTier
return &relaycommon.RelayInfo{
TieredBillingSnapshot: snap,
@@ -56,7 +60,8 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
GroupRatio: 1.0,
EstimatedPromptTokens: 100,
EstimatedCompletionTokens: 0,
EstimatedQuotaAfterGroup: 100,
EstimatedQuotaAfterGroup: 50,
QuotaPerUnit: testQuotaPerUnit,
},
BillingRequestInput: &billingexpr.RequestInput{
Body: []byte(`{"service_tier":"fast"}`),
@@ -67,8 +72,9 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle to apply")
}
if quota != 200 {
t.Fatalf("quota = %d, want 200", quota)
// fast: p*2 = 200; quota = 200 / 1M * 500K = 100
if quota != 100 {
t.Fatalf("quota = %d, want 100", quota)
}
if result == nil || result.MatchedTier != "fast" {
t.Fatalf("matched tier = %v, want fast", result)
@@ -111,9 +117,9 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// p*2 + c*10 = 2000 + 5000 = 7000
if quota != 7000 {
t.Fatalf("quota = %d, want 7000", quota)
// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
if quota != 3500 {
t.Fatalf("quota = %d, want 3500", quota)
}
if quota != info.FinalPreConsumedQuota {
t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
@@ -122,7 +128,7 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
preConsumed := info.FinalPreConsumedQuota // 7000
preConsumed := info.FinalPreConsumedQuota // 3500
// Actual usage is higher than estimated
params := billingexpr.TokenParams{P: 2000, C: 1000}
@@ -130,9 +136,9 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// p*2 + c*10 = 4000 + 10000 = 14000
if quota != 14000 {
t.Fatalf("quota = %d, want 14000", quota)
// p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
if quota != 7000 {
t.Fatalf("quota = %d, want 7000", quota)
}
if quota <= preConsumed {
t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
@@ -141,7 +147,7 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
preConsumed := info.FinalPreConsumedQuota // 7000
preConsumed := info.FinalPreConsumedQuota // 3500
// Actual usage is lower than estimated
params := billingexpr.TokenParams{P: 100, C: 50}
@@ -149,9 +155,9 @@ func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// p*2 + c*10 = 200 + 500 = 700
if quota != 700 {
t.Fatalf("quota = %d, want 700", quota)
// p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
if quota != 350 {
t.Fatalf("quota = %d, want 350", quota)
}
if quota >= preConsumed {
t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
@@ -170,9 +176,9 @@ func TestTryTieredSettle_ExactBoundary(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// standard: p*1.5 + c*7.5 = 300000 + 7500 = 307500
if quota != 307500 {
t.Fatalf("quota = %d, want 307500", quota)
// standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
if quota != 153750 {
t.Fatalf("quota = %d, want 153750", quota)
}
if result.MatchedTier != "standard" {
t.Fatalf("tier = %s, want standard", result.MatchedTier)
@@ -187,9 +193,9 @@ func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// long_context: p*3 + c*11.25 = 600003 + 11250 = 611253
if quota != 611253 {
t.Fatalf("quota = %d, want 611253", quota)
// long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
if quota != 305627 {
t.Fatalf("quota = %d, want 305627", quota)
}
if result.MatchedTier != "long_context" {
t.Fatalf("tier = %s, want long_context", result.MatchedTier)
@@ -221,9 +227,9 @@ func TestTryTieredSettle_HugeTokens(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// p*2 + c*10 = 20000000 + 50000000 = 70000000
if quota != 70000000 {
t.Fatalf("quota = %d, want 70000000", quota)
// p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
if quota != 35000000 {
t.Fatalf("quota = %d, want 35000000", quota)
}
}
@@ -235,23 +241,23 @@ func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
if !ok1 {
t.Fatal("expected tiered settle")
}
// p*2 + c*10 + cr*0.2 + cc*2.5 + cc1h*4 = 2000 + 5000 + 0 + 0 + 0 = 7000
// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
// With cache tokens
ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
if !ok2 {
t.Fatal("expected tiered settle")
}
// 2000 + 5000 + 10000*0.2 + 5000*2.5 + 2000*4 = 2000 + 5000 + 2000 + 12500 + 8000 = 29500
// 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750
if quota2 <= quota1 {
t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
}
if quota1 != 7000 {
t.Fatalf("no-cache quota = %d, want 7000", quota1)
if quota1 != 3500 {
t.Fatalf("no-cache quota = %d, want 3500", quota1)
}
if quota2 != 29500 {
t.Fatalf("cache quota = %d, want 29500", quota2)
if quota2 != 14750 {
t.Fatalf("cache quota = %d, want 14750", quota2)
}
}
@@ -269,9 +275,9 @@ func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// fast: p*4 + c*20 = 4000 + 10000 = 14000
if quota != 14000 {
t.Fatalf("quota = %d, want 14000", quota)
// fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
if quota != 7000 {
t.Fatalf("quota = %d, want 7000", quota)
}
if result.MatchedTier != "fast" {
t.Fatalf("tier = %s, want fast", result.MatchedTier)
@@ -286,9 +292,9 @@ func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// normal: p*2 + c*10 = 2000 + 5000 = 7000
if quota != 7000 {
t.Fatalf("quota = %d, want 7000", quota)
// normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
if quota != 3500 {
t.Fatalf("quota = %d, want 3500", quota)
}
if result.MatchedTier != "normal" {
t.Fatalf("tier = %s, want normal", result.MatchedTier)
@@ -306,9 +312,9 @@ func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
if !ok {
t.Fatal("expected tiered settle")
}
// cost = 7000, after group = round(7000 * 1.5) = 10500
if quota != 10500 {
t.Fatalf("quota = %d, want 10500", quota)
// exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
if quota != 5250 {
t.Fatalf("quota = %d, want 5250", quota)
}
}
@@ -37,7 +37,7 @@ const { Text } = Typography;
const PRICE_SUFFIX = '$/1M tokens';
function unitCostToPrice(uc) {
return (Number(uc) || 0) * 2;
return Number(uc) || 0;
}
function formatPrice(uc) {
@@ -269,7 +269,7 @@ const PricingCardView = ({
</h3>
<div className='flex flex-col gap-1 text-xs mt-1'>
{priceData.isDynamicPricing ? (
formatDynamicPriceSummary(priceData.billingExpr, t)
formatDynamicPriceSummary(priceData.billingExpr, t, priceData.usedGroupRatio)
) : (
formatPriceInfo(priceData, t, siteDisplayType)
)}
+5 -5
View File
@@ -2222,11 +2222,11 @@ function parseTiersFromExpr(exprStr) {
while ((m = tierRe.exec(exprStr)) !== null) {
tiers.push({
label: m[1],
inputPrice: Number(m[2]) * 2,
outputPrice: Number(m[3]) * 2,
cacheReadPrice: m[4] ? Number(m[4]) * 2 : 0,
cacheCreatePrice: m[5] ? Number(m[5]) * 2 : 0,
cacheCreate1hPrice: m[6] ? Number(m[6]) * 2 : 0,
inputPrice: Number(m[2]),
outputPrice: Number(m[3]),
cacheReadPrice: m[4] ? Number(m[4]) : 0,
cacheCreatePrice: m[5] ? Number(m[5]) : 0,
cacheCreate1hPrice: m[6] ? Number(m[6]) : 0,
});
}
return tiers;
+6 -5
View File
@@ -897,9 +897,10 @@ export const getModelPriceItems = (
};
// 格式化动态计费摘要(用于卡片视图,与 formatPriceInfo 风格统一)
export const formatDynamicPriceSummary = (billingExpr, t) => {
export const formatDynamicPriceSummary = (billingExpr, t, groupRatio = 1) => {
if (!billingExpr) return <span style={{ color: 'var(--semi-color-text-1)' }}>{t('动态计费')}</span>;
const gr = groupRatio || 1;
const tierMatches = billingExpr.match(/tier\(/g) || [];
const tierCount = tierMatches.length;
@@ -923,19 +924,19 @@ export const formatDynamicPriceSummary = (billingExpr, t) => {
{firstTierMatch && (
<>
<span style={lineStyle}>
{t('输入价格')} ${(Number(firstTierMatch[1]) * 2).toFixed(4)}{unitSuffix}
{t('输入价格')} ${(Number(firstTierMatch[1]) * gr).toFixed(4)}{unitSuffix}
</span>
<span style={lineStyle}>
{t('输出价格')} ${(Number(firstTierMatch[2]) * 2).toFixed(4)}{unitSuffix}
{t('输出价格')} ${(Number(firstTierMatch[2]) * gr).toFixed(4)}{unitSuffix}
</span>
{firstTierMatch[3] && (
<span style={lineStyle}>
{t('缓存读取价格')} ${(Number(firstTierMatch[3]) * 2).toFixed(4)}{unitSuffix}
{t('缓存读取价格')} ${(Number(firstTierMatch[3]) * gr).toFixed(4)}{unitSuffix}
</span>
)}
{firstTierMatch[4] && (
<span style={lineStyle}>
{t('缓存创建价格')} ${(Number(firstTierMatch[4]) * 2).toFixed(4)}{unitSuffix}
{t('缓存创建价格')} ${(Number(firstTierMatch[4]) * gr).toFixed(4)}{unitSuffix}
</span>
)}
</>
@@ -60,10 +60,10 @@ const { Text } = Typography;
const PRICE_SUFFIX = '$/1M tokens';
function unitCostToPrice(uc) {
return (Number(uc) || 0) * 2;
return Number(uc) || 0;
}
function priceToUnitCost(price) {
return (Number(price) || 0) / 2;
return Number(price) || 0;
}
const OPS = ['<', '<=', '>', '>='];
@@ -762,23 +762,23 @@ const PRESET_GROUPS = [
{
group: '固定价格',
presets: [
{ key: 'flat', label: 'Flat', expr: 'tier("base", p * 1 + c * 2)' },
{ key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 2.5 + c * 12.5 + cr * 0.25 + cc * 3.125 + cc1h * 5)' },
{ key: 'gpt-5.4', label: 'GPT-5.4', expr: 'tier("base", p * 1.25 + c * 5 + cr * 0.125)' },
{ key: 'flat', label: 'Flat', expr: 'tier("base", p * 2 + c * 4)' },
{ key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)' },
{ key: 'gpt-5.4', label: 'GPT-5.4', expr: 'tier("base", p * 2.5 + c * 10 + cr * 0.25)' },
],
},
{
group: '阶梯计费',
presets: [
{ key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'p <= 200000 ? tier("standard", p * 1.5 + c * 7.5 + cr * 0.15 + cc * 1.875 + cc1h * 3) : tier("long_context", p * 3 + c * 11.25 + cr * 0.3 + cc * 3.75 + cc1h * 6)' },
{ key: 'qwen3-max', label: 'Qwen3-Max', expr: 'p <= 32000 ? tier("short", p * 0.6 + c * 3 + cr * 0.12 + cc * 0.75) : p <= 128000 ? tier("mid", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : tier("long", p * 1.5 + c * 7.5 + cr * 0.3 + cc * 1.875)' },
{ key: 'glm-4.5-air', label: 'GLM-4.5-Air', expr: 'p < 32000 && c < 200 ? tier("short_output", p * 0.4 + c * 1 + cr * 0.08) : p < 32000 && c >= 200 ? tier("long_output", p * 0.4 + c * 3 + cr * 0.08) : tier("mid_context", p * 0.6 + c * 4 + cr * 0.12)' },
{ key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)' },
{ key: 'qwen3-max', label: 'Qwen3-Max', expr: 'p <= 32000 ? tier("short", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : p <= 128000 ? tier("mid", p * 2.4 + c * 12 + cr * 0.48 + cc * 3) : tier("long", p * 3 + c * 15 + cr * 0.6 + cc * 3.75)' },
{ key: 'glm-4.5-air', label: 'GLM-4.5-Air', expr: 'p < 32000 && c < 200 ? tier("short_output", p * 0.8 + c * 2 + cr * 0.16) : p < 32000 && c >= 200 ? tier("long_output", p * 0.8 + c * 6 + cr * 0.16) : tier("mid_context", p * 1.2 + c * 8 + cr * 0.24)' },
],
},
{
group: '多模态',
presets: [
{ key: 'qwen3-omni-flash', label: 'Qwen3-Omni-Flash', expr: 'tier("base", p * 0.215 + c * 1.53 + img * 0.39 + ai * 1.905 + ao * 7.555)' },
{ key: 'qwen3-omni-flash', label: 'Qwen3-Omni-Flash', expr: 'tier("base", p * 0.43 + c * 3.06 + img * 0.78 + ai * 3.81 + ao * 15.11)' },
],
},
{
@@ -786,12 +786,12 @@ const PRESET_GROUPS = [
presets: [
{
key: 'claude-opus-fast', label: 'Claude Opus 4.6 Fast',
expr: 'tier("base", p * 2.5 + c * 12.5 + cr * 0.25 + cc * 3.125 + cc1h * 5)',
expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)',
requestRules: [{ conditions: [{ source: SOURCE_HEADER, path: 'anthropic-beta', mode: MATCH_CONTAINS, value: 'fast-mode-2026-02-01' }], multiplier: '6' }],
},
{
key: 'gpt-5.4-fast', label: 'GPT-5.4 Fast',
expr: 'tier("base", p * 1.25 + c * 5 + cr * 0.125)',
expr: 'tier("base", p * 2.5 + c * 10 + cr * 0.25)',
requestRules: [{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'fast' }], multiplier: '2' }],
},
],
@@ -801,12 +801,12 @@ const PRESET_GROUPS = [
presets: [
{
key: 'night-discount', label: '夜间半价',
expr: 'tier("base", p * 1.5 + c * 7.5)',
expr: 'tier("base", p * 3 + c * 15)',
requestRules: [{ conditions: [{ source: SOURCE_TIME, timeFunc: 'hour', timezone: 'Asia/Shanghai', mode: MATCH_RANGE, rangeStart: '21', rangeEnd: '6' }], multiplier: '0.5' }],
},
{
key: 'weekend-discount', label: '周末8折',
expr: 'tier("base", p * 1.5 + c * 7.5)',
expr: 'tier("base", p * 3 + c * 15)',
requestRules: [
{ conditions: [{ source: SOURCE_TIME, timeFunc: 'weekday', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '0' }], multiplier: '0.8' },
{ conditions: [{ source: SOURCE_TIME, timeFunc: 'weekday', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '6' }], multiplier: '0.8' },
@@ -814,7 +814,7 @@ const PRESET_GROUPS = [
},
{
key: 'new-year-promo', label: '新年促销',
expr: 'tier("base", p * 1.5 + c * 7.5)',
expr: 'tier("base", p * 3 + c * 15)',
requestRules: [{ conditions: [
{ source: SOURCE_TIME, timeFunc: 'month', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '1' },
{ source: SOURCE_TIME, timeFunc: 'day', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '1' },