refactor: update billing calculations to use quota per unit

- Adjusted billing calculations in tests and core logic to incorporate a new QuotaPerUnit field. - Modified estimated quota calculations to reflect changes in tiered billing logic. - Updated related tests to ensure accuracy with the new quota calculations. - Enhanced dynamic pricing components to align with updated billing expressions.
2026-03-16 20:11:55 +08:00
parent f0589cc478
commit f6c0852da9
10 changed files with 160 additions and 132 deletions
@@ -313,9 +313,10 @@ func TestComputeTieredQuota_Basic(t *testing.T) {
 		GroupRatio:                1.0,
 		EstimatedPromptTokens:     100000,
 		EstimatedCompletionTokens: 5000,
-		EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
-		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound(100000*1.5 + 5000*7.5),
+		EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
+		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000),
 		EstimatedTier:             "standard",
+		QuotaPerUnit:              500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 300000, C: 10000})
@@ -323,7 +324,7 @@ func TestComputeTieredQuota_Basic(t *testing.T) {
 		t.Fatal(err)
 	}

-	wantBefore := 300000*3.0 + 10000*11.25
+	wantBefore := (300000*3.0 + 10000*11.25) / 1_000_000 * 500_000
 	if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
 		t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
 	}
@@ -343,9 +344,10 @@ func TestComputeTieredQuota_SameTier(t *testing.T) {
 		GroupRatio:                1.5,
 		EstimatedPromptTokens:     50000,
 		EstimatedCompletionTokens: 1000,
-		EstimatedQuotaBeforeGroup: 50000*1.5 + 1000*7.5,
-		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((50000*1.5 + 1000*7.5) * 1.5),
+		EstimatedQuotaBeforeGroup: (50000*1.5 + 1000*7.5) / 1_000_000 * 500_000,
+		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((50000*1.5 + 1000*7.5) / 1_000_000 * 500_000 * 1.5),
 		EstimatedTier:             "standard",
+		QuotaPerUnit:              500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 80000, C: 2000})
@@ -353,7 +355,7 @@ func TestComputeTieredQuota_SameTier(t *testing.T) {
 		t.Fatal(err)
 	}

-	wantBefore := 80000*1.5 + 2000*7.5
+	wantBefore := (80000*1.5 + 2000*7.5) / 1_000_000 * 500_000
 	wantAfter := billingexpr.QuotaRound(wantBefore * 1.5)
 	if result.ActualQuotaAfterGroup != wantAfter {
 		t.Errorf("after group: got %d, want %d", result.ActualQuotaAfterGroup, wantAfter)
@@ -534,9 +536,10 @@ func TestComputeTieredQuota_WithCache(t *testing.T) {
 		GroupRatio:                1.0,
 		EstimatedPromptTokens:     100000,
 		EstimatedCompletionTokens: 5000,
-		EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
-		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound(100000*1.5 + 5000*7.5),
+		EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
+		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000),
 		EstimatedTier:             "standard",
+		QuotaPerUnit:              500_000,
 	}

 	params := billingexpr.TokenParams{P: 100000, C: 5000, CR: 50000, CC: 10000}
@@ -545,7 +548,7 @@ func TestComputeTieredQuota_WithCache(t *testing.T) {
 		t.Fatal(err)
 	}

-	wantBefore := 100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875
+	wantBefore := (100000*1.5 + 5000*7.5 + 50000*0.15 + 10000*1.875) / 1_000_000 * 500_000
 	if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
 		t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
 	}
@@ -565,9 +568,10 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) {
 		GroupRatio:                2.0,
 		EstimatedPromptTokens:     100000,
 		EstimatedCompletionTokens: 5000,
-		EstimatedQuotaBeforeGroup: 100000*1.5 + 5000*7.5,
-		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((100000*1.5 + 5000*7.5) * 2.0),
+		EstimatedQuotaBeforeGroup: (100000*1.5 + 5000*7.5) / 1_000_000 * 500_000,
+		EstimatedQuotaAfterGroup:  billingexpr.QuotaRound((100000*1.5 + 5000*7.5) / 1_000_000 * 500_000 * 2.0),
 		EstimatedTier:             "standard",
+		QuotaPerUnit:              500_000,
 	}

 	params := billingexpr.TokenParams{P: 300000, C: 10000, CR: 50000, CC: 10000}
@@ -576,7 +580,7 @@ func TestComputeTieredQuota_WithCacheCrossTier(t *testing.T) {
 		t.Fatal(err)
 	}

-	wantBefore := 300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75
+	wantBefore := (300000*3.0 + 10000*11.25 + 50000*0.3 + 10000*3.75) / 1_000_000 * 500_000
 	wantAfter := billingexpr.QuotaRound(wantBefore * 2.0)
 	if math.Abs(result.ActualQuotaBeforeGroup-wantBefore) > 1e-6 {
 		t.Errorf("before group: got %f, want %f", result.ActualQuotaBeforeGroup, wantBefore)
@@ -646,6 +650,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) {

 		estCost, estTrace, _ := billingexpr.RunExpr(claudeWithCacheExpr, estParams)

+		const qpu = 500_000.0
 		snap := &billingexpr.BillingSnapshot{
 			BillingMode:               "tiered_expr",
 			ExprString:                claudeWithCacheExpr,
@@ -653,9 +658,10 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
 			GroupRatio:                groupRatio,
 			EstimatedPromptTokens:     int(estParams.P),
 			EstimatedCompletionTokens: int(estParams.C),
-			EstimatedQuotaBeforeGroup: estCost,
-			EstimatedQuotaAfterGroup:  billingexpr.QuotaRound(estCost * groupRatio),
+			EstimatedQuotaBeforeGroup: estCost / 1_000_000 * qpu,
+			EstimatedQuotaAfterGroup:  billingexpr.QuotaRound(estCost / 1_000_000 * qpu * groupRatio),
 			EstimatedTier:             estTrace.MatchedTier,
+			QuotaPerUnit:              qpu,
 		}

 		result, err := billingexpr.ComputeTieredQuota(snap, actParams)
@@ -664,7 +670,7 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
 		}

 		directCost, _, _ := billingexpr.RunExpr(claudeWithCacheExpr, actParams)
-		directQuota := billingexpr.QuotaRound(directCost * groupRatio)
+		directQuota := billingexpr.QuotaRound(directCost / 1_000_000 * qpu * groupRatio)

 		if result.ActualQuotaAfterGroup != directQuota {
 			t.Errorf("iter %d: settlement %d != direct %d", i, result.ActualQuotaAfterGroup, directQuota)
@@ -679,21 +685,23 @@ func TestFuzz_SettlementConsistency(t *testing.T) {
 func TestComputeTieredQuota_BasicSettlement(t *testing.T) {
 	exprStr := `tier("default", p + c)`
 	snap := &billingexpr.BillingSnapshot{
-		BillingMode: "tiered_expr",
-		ExprString:  exprStr,
-		ExprHash:    billingexpr.ExprHashString(exprStr),
-		GroupRatio:  1.0,
+		BillingMode:  "tiered_expr",
+		ExprString:   exprStr,
+		ExprHash:     billingexpr.ExprHashString(exprStr),
+		GroupRatio:   1.0,
+		QuotaPerUnit: 500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3000, C: 2000})
 	if err != nil {
 		t.Fatal(err)
 	}
-	if math.Abs(result.ActualQuotaBeforeGroup-5000) > 1e-6 {
-		t.Errorf("before group = %f, want 5000", result.ActualQuotaBeforeGroup)
+	// exprOutput = 5000; quota = 5000 / 1M * 500K = 2500
+	if math.Abs(result.ActualQuotaBeforeGroup-2500) > 1e-6 {
+		t.Errorf("before group = %f, want 2500", result.ActualQuotaBeforeGroup)
 	}
-	if result.ActualQuotaAfterGroup != 5000 {
-		t.Errorf("after group = %d, want 5000", result.ActualQuotaAfterGroup)
+	if result.ActualQuotaAfterGroup != 2500 {
+		t.Errorf("after group = %d, want 2500", result.ActualQuotaAfterGroup)
 	}
 	if result.MatchedTier != "default" {
 		t.Errorf("tier = %q, want default", result.MatchedTier)
@@ -703,29 +711,31 @@ func TestComputeTieredQuota_BasicSettlement(t *testing.T) {
 func TestComputeTieredQuota_WithGroupRatio(t *testing.T) {
 	exprStr := `tier("default", p + c)`
 	snap := &billingexpr.BillingSnapshot{
-		BillingMode: "tiered_expr",
-		ExprString:  exprStr,
-		ExprHash:    billingexpr.ExprHashString(exprStr),
-		GroupRatio:  2.0,
+		BillingMode:  "tiered_expr",
+		ExprString:   exprStr,
+		ExprHash:     billingexpr.ExprHashString(exprStr),
+		GroupRatio:   2.0,
+		QuotaPerUnit: 500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 1000, C: 500})
 	if err != nil {
 		t.Fatal(err)
 	}
-	// cost = 1500, after group = round(1500 * 2.0) = 3000
-	if result.ActualQuotaAfterGroup != 3000 {
-		t.Errorf("after group = %d, want 3000", result.ActualQuotaAfterGroup)
+	// exprOutput = 1500; quotaBeforeGroup = 750; afterGroup = round(750 * 2.0) = 1500
+	if result.ActualQuotaAfterGroup != 1500 {
+		t.Errorf("after group = %d, want 1500", result.ActualQuotaAfterGroup)
 	}
 }

 func TestComputeTieredQuota_ZeroTokens(t *testing.T) {
 	exprStr := `tier("default", p * 2 + c * 10)`
 	snap := &billingexpr.BillingSnapshot{
-		BillingMode: "tiered_expr",
-		ExprString:  exprStr,
-		ExprHash:    billingexpr.ExprHashString(exprStr),
-		GroupRatio:  1.0,
+		BillingMode:  "tiered_expr",
+		ExprString:   exprStr,
+		ExprHash:     billingexpr.ExprHashString(exprStr),
+		GroupRatio:   1.0,
+		QuotaPerUnit: 500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{})
@@ -738,40 +748,42 @@ func TestComputeTieredQuota_ZeroTokens(t *testing.T) {
 }

 func TestComputeTieredQuota_RoundingEdge(t *testing.T) {
-	exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 -> round to 2
+	exprStr := `tier("default", p * 0.5)` // 3 * 0.5 = 1.5 (expr); 1.5 / 1M * 500K = 0.75; round(0.75) = 1
 	snap := &billingexpr.BillingSnapshot{
-		BillingMode: "tiered_expr",
-		ExprString:  exprStr,
-		ExprHash:    billingexpr.ExprHashString(exprStr),
-		GroupRatio:  1.0,
+		BillingMode:  "tiered_expr",
+		ExprString:   exprStr,
+		ExprHash:     billingexpr.ExprHashString(exprStr),
+		GroupRatio:   1.0,
+		QuotaPerUnit: 500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3})
 	if err != nil {
 		t.Fatal(err)
 	}
-	// 3 * 0.5 = 1.5, round(1.5) = 2
-	if result.ActualQuotaAfterGroup != 2 {
-		t.Errorf("after group = %d, want 2 (round 1.5 up)", result.ActualQuotaAfterGroup)
+	// 3 * 0.5 = 1.5 (expr); quota = 1.5 / 1M * 500K = 0.75; round(0.75) = 1
+	if result.ActualQuotaAfterGroup != 1 {
+		t.Errorf("after group = %d, want 1 (round 0.75 up)", result.ActualQuotaAfterGroup)
 	}
 }

 func TestComputeTieredQuota_RoundingEdgeDown(t *testing.T) {
-	exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 -> round to 1
+	exprStr := `tier("default", p * 0.4)` // 3 * 0.4 = 1.2 (expr); 1.2 / 1M * 500K = 0.6; round(0.6) = 1
 	snap := &billingexpr.BillingSnapshot{
-		BillingMode: "tiered_expr",
-		ExprString:  exprStr,
-		ExprHash:    billingexpr.ExprHashString(exprStr),
-		GroupRatio:  1.0,
+		BillingMode:  "tiered_expr",
+		ExprString:   exprStr,
+		ExprHash:     billingexpr.ExprHashString(exprStr),
+		GroupRatio:   1.0,
+		QuotaPerUnit: 500_000,
 	}

 	result, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 3})
 	if err != nil {
 		t.Fatal(err)
 	}
-	// 3 * 0.4 = 1.2, round(1.2) = 1
+	// 3 * 0.4 = 1.2 (expr); quota = 1.2 / 1M * 500K = 0.6; round(0.6) = 1
 	if result.ActualQuotaAfterGroup != 1 {
-		t.Errorf("after group = %d, want 1 (round 1.2 down)", result.ActualQuotaAfterGroup)
+		t.Errorf("after group = %d, want 1 (round 0.6 up)", result.ActualQuotaAfterGroup)
 	}
 }

@@ -783,6 +795,7 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
 		ExprHash:      billingexpr.ExprHashString(exprStr),
 		GroupRatio:    1.0,
 		EstimatedTier: "normal",
+		QuotaPerUnit:  500_000,
 	}

 	// Without request: normal tier
@@ -790,8 +803,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if r1.ActualQuotaAfterGroup != 2000 {
-		t.Errorf("normal = %d, want 2000", r1.ActualQuotaAfterGroup)
+	// normal: p*2 = 2000; quota = 2000 / 1M * 500K = 1000
+	if r1.ActualQuotaAfterGroup != 1000 {
+		t.Errorf("normal = %d, want 1000", r1.ActualQuotaAfterGroup)
 	}

 	// With request: fast tier
@@ -801,8 +815,9 @@ func TestComputeTieredQuotaWithRequest_ProbeAffectsQuota(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if r2.ActualQuotaAfterGroup != 4000 {
-		t.Errorf("fast = %d, want 4000", r2.ActualQuotaAfterGroup)
+	// fast: p*4 = 4000; quota = 4000 / 1M * 500K = 2000
+	if r2.ActualQuotaAfterGroup != 2000 {
+		t.Errorf("fast = %d, want 2000", r2.ActualQuotaAfterGroup)
 	}
 	if !r2.CrossedTier {
 		t.Error("expected CrossedTier = true when probe changes tier")
@@ -817,9 +832,10 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
 		ExprHash:      billingexpr.ExprHashString(exprStr),
 		GroupRatio:    1.0,
 		EstimatedTier: "small",
+		QuotaPerUnit:  500_000,
 	}

-	// At boundary
+	// At boundary: small, p*1 = 100000; quota = 100000 / 1M * 500K = 50000
 	r1, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100000})
 	if err != nil {
 		t.Fatal(err)
@@ -827,11 +843,11 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
 	if r1.MatchedTier != "small" {
 		t.Errorf("at boundary: tier = %s, want small", r1.MatchedTier)
 	}
-	if r1.ActualQuotaAfterGroup != 100000 {
-		t.Errorf("at boundary: quota = %d, want 100000", r1.ActualQuotaAfterGroup)
+	if r1.ActualQuotaAfterGroup != 50000 {
+		t.Errorf("at boundary: quota = %d, want 50000", r1.ActualQuotaAfterGroup)
 	}

-	// Past boundary
+	// Past boundary: large, p*2 = 200002; quota = 200002 / 1M * 500K = 100001
 	r2, err := billingexpr.ComputeTieredQuota(snap, billingexpr.TokenParams{P: 100001})
 	if err != nil {
 		t.Fatal(err)
@@ -839,8 +855,8 @@ func TestComputeTieredQuota_BoundaryTierCrossing(t *testing.T) {
 	if r2.MatchedTier != "large" {
 		t.Errorf("past boundary: tier = %s, want large", r2.MatchedTier)
 	}
-	if r2.ActualQuotaAfterGroup != 200002 {
-		t.Errorf("past boundary: quota = %d, want 200002", r2.ActualQuotaAfterGroup)
+	if r2.ActualQuotaAfterGroup != 100001 {
+		t.Errorf("past boundary: quota = %d, want 100001", r2.ActualQuotaAfterGroup)
 	}
 	if !r2.CrossedTier {
 		t.Error("expected CrossedTier = true")
@@ -12,11 +12,12 @@ func ComputeTieredQuotaWithRequest(snap *BillingSnapshot, params TokenParams, re
 		return TieredResult{}, err
 	}

-	afterGroup := QuotaRound(cost * snap.GroupRatio)
+	quotaBeforeGroup := cost / 1_000_000 * snap.QuotaPerUnit
+	afterGroup := QuotaRound(quotaBeforeGroup * snap.GroupRatio)
 	crossed := trace.MatchedTier != snap.EstimatedTier

 	return TieredResult{
-		ActualQuotaBeforeGroup: cost,
+		ActualQuotaBeforeGroup: quotaBeforeGroup,
 		ActualQuotaAfterGroup:  afterGroup,
 		MatchedTier:            trace.MatchedTier,
 		CrossedTier:            crossed,
@@ -45,6 +45,7 @@ type BillingSnapshot struct {
 	EstimatedQuotaBeforeGroup float64 `json:"estimated_quota_before_group"`
 	EstimatedQuotaAfterGroup  int     `json:"estimated_quota_after_group"`
 	EstimatedTier             string  `json:"estimated_tier"`
+	QuotaPerUnit              float64 `json:"quota_per_unit"`
 }

 // TieredResult holds everything needed after running tiered settlement.
@@ -225,7 +225,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
 		return types.PriceData{}, err
 	}

-	rawQuota, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
+	rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
 		P: float64(promptTokens),
 		C: float64(estimatedCompletionTokens),
 	}, requestInput)
@@ -233,11 +233,13 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
 		return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err)
 	}

-	preConsumedQuota := billingexpr.QuotaRound(rawQuota * groupRatioInfo.GroupRatio)
+	// Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does.
+	quotaBeforeGroup := rawCost / 1_000_000 * common.QuotaPerUnit
+	preConsumedQuota := billingexpr.QuotaRound(quotaBeforeGroup * groupRatioInfo.GroupRatio)

 	freeModel := false
 	if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume {
-		if groupRatioInfo.GroupRatio == 0 || rawQuota == 0 {
+		if groupRatioInfo.GroupRatio == 0 || quotaBeforeGroup == 0 {
 			preConsumedQuota = 0
 			freeModel = true
 		}
@@ -252,9 +254,10 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
 		GroupRatio:                groupRatioInfo.GroupRatio,
 		EstimatedPromptTokens:     promptTokens,
 		EstimatedCompletionTokens: estimatedCompletionTokens,
-		EstimatedQuotaBeforeGroup: rawQuota,
+		EstimatedQuotaBeforeGroup: quotaBeforeGroup,
 		EstimatedQuotaAfterGroup:  preConsumedQuota,
 		EstimatedTier:             trace.MatchedTier,
+		QuotaPerUnit:              common.QuotaPerUnit,
 	}
 	info.TieredBillingSnapshot = snapshot
 	info.BillingRequestInput = &requestInput
@@ -266,7 +269,7 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
 	}

 	if common.DebugEnabled {
-		println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d rawQuota=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, rawQuota, groupRatioInfo.GroupRatio, trace.MatchedTier))
+		println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, quotaBeforeGroup, groupRatioInfo.GroupRatio, trace.MatchedTier))
 	}

 	info.PriceData = priceData
@@ -19,6 +19,8 @@ const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h *
 // Expression with request probes
 const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`

+const testQuotaPerUnit = 500_000.0
+
 func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
 	return &billingexpr.BillingSnapshot{
 		BillingMode:               "tiered_expr",
@@ -27,14 +29,16 @@ func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int)
 		GroupRatio:                groupRatio,
 		EstimatedPromptTokens:     estPrompt,
 		EstimatedCompletionTokens: estCompletion,
+		QuotaPerUnit:              testQuotaPerUnit,
 	}
 }

 func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
 	snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
 	cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
-	snap.EstimatedQuotaBeforeGroup = cost
-	snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(cost * groupRatio)
+	quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
+	snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
+	snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
 	snap.EstimatedTier = trace.MatchedTier
 	return &relaycommon.RelayInfo{
 		TieredBillingSnapshot: snap,
@@ -56,7 +60,8 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
 			GroupRatio:                1.0,
 			EstimatedPromptTokens:     100,
 			EstimatedCompletionTokens: 0,
-			EstimatedQuotaAfterGroup:  100,
+			EstimatedQuotaAfterGroup:  50,
+			QuotaPerUnit:              testQuotaPerUnit,
 		},
 		BillingRequestInput: &billingexpr.RequestInput{
 			Body: []byte(`{"service_tier":"fast"}`),
@@ -67,8 +72,9 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle to apply")
 	}
-	if quota != 200 {
-		t.Fatalf("quota = %d, want 200", quota)
+	// fast: p*2 = 200; quota = 200 / 1M * 500K = 100
+	if quota != 100 {
+		t.Fatalf("quota = %d, want 100", quota)
 	}
 	if result == nil || result.MatchedTier != "fast" {
 		t.Fatalf("matched tier = %v, want fast", result)
@@ -111,9 +117,9 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 2000 + 5000 = 7000
-	if quota != 7000 {
-		t.Fatalf("quota = %d, want 7000", quota)
+	// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
+	if quota != 3500 {
+		t.Fatalf("quota = %d, want 3500", quota)
 	}
 	if quota != info.FinalPreConsumedQuota {
 		t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
@@ -122,7 +128,7 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {

 func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
 	info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
-	preConsumed := info.FinalPreConsumedQuota // 7000
+	preConsumed := info.FinalPreConsumedQuota // 3500

 	// Actual usage is higher than estimated
 	params := billingexpr.TokenParams{P: 2000, C: 1000}
@@ -130,9 +136,9 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 4000 + 10000 = 14000
-	if quota != 14000 {
-		t.Fatalf("quota = %d, want 14000", quota)
+	// p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
+	if quota != 7000 {
+		t.Fatalf("quota = %d, want 7000", quota)
 	}
 	if quota <= preConsumed {
 		t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
@@ -141,7 +147,7 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {

 func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
 	info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
-	preConsumed := info.FinalPreConsumedQuota // 7000
+	preConsumed := info.FinalPreConsumedQuota // 3500

 	// Actual usage is lower than estimated
 	params := billingexpr.TokenParams{P: 100, C: 50}
@@ -149,9 +155,9 @@ func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 200 + 500 = 700
-	if quota != 700 {
-		t.Fatalf("quota = %d, want 700", quota)
+	// p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
+	if quota != 350 {
+		t.Fatalf("quota = %d, want 350", quota)
 	}
 	if quota >= preConsumed {
 		t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
@@ -170,9 +176,9 @@ func TestTryTieredSettle_ExactBoundary(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// standard: p*1.5 + c*7.5 = 300000 + 7500 = 307500
-	if quota != 307500 {
-		t.Fatalf("quota = %d, want 307500", quota)
+	// standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
+	if quota != 153750 {
+		t.Fatalf("quota = %d, want 153750", quota)
 	}
 	if result.MatchedTier != "standard" {
 		t.Fatalf("tier = %s, want standard", result.MatchedTier)
@@ -187,9 +193,9 @@ func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// long_context: p*3 + c*11.25 = 600003 + 11250 = 611253
-	if quota != 611253 {
-		t.Fatalf("quota = %d, want 611253", quota)
+	// long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
+	if quota != 305627 {
+		t.Fatalf("quota = %d, want 305627", quota)
 	}
 	if result.MatchedTier != "long_context" {
 		t.Fatalf("tier = %s, want long_context", result.MatchedTier)
@@ -221,9 +227,9 @@ func TestTryTieredSettle_HugeTokens(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 20000000 + 50000000 = 70000000
-	if quota != 70000000 {
-		t.Fatalf("quota = %d, want 70000000", quota)
+	// p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
+	if quota != 35000000 {
+		t.Fatalf("quota = %d, want 35000000", quota)
 	}
 }

@@ -235,23 +241,23 @@ func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
 	if !ok1 {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 + cr*0.2 + cc*2.5 + cc1h*4 = 2000 + 5000 + 0 + 0 + 0 = 7000
+	// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500

 	// With cache tokens
 	ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
 	if !ok2 {
 		t.Fatal("expected tiered settle")
 	}
-	// 2000 + 5000 + 10000*0.2 + 5000*2.5 + 2000*4 = 2000 + 5000 + 2000 + 12500 + 8000 = 29500
+	// 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750

 	if quota2 <= quota1 {
 		t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
 	}
-	if quota1 != 7000 {
-		t.Fatalf("no-cache quota = %d, want 7000", quota1)
+	if quota1 != 3500 {
+		t.Fatalf("no-cache quota = %d, want 3500", quota1)
 	}
-	if quota2 != 29500 {
-		t.Fatalf("cache quota = %d, want 29500", quota2)
+	if quota2 != 14750 {
+		t.Fatalf("cache quota = %d, want 14750", quota2)
 	}
 }

@@ -269,9 +275,9 @@ func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// fast: p*4 + c*20 = 4000 + 10000 = 14000
-	if quota != 14000 {
-		t.Fatalf("quota = %d, want 14000", quota)
+	// fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
+	if quota != 7000 {
+		t.Fatalf("quota = %d, want 7000", quota)
 	}
 	if result.MatchedTier != "fast" {
 		t.Fatalf("tier = %s, want fast", result.MatchedTier)
@@ -286,9 +292,9 @@ func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// normal: p*2 + c*10 = 2000 + 5000 = 7000
-	if quota != 7000 {
-		t.Fatalf("quota = %d, want 7000", quota)
+	// normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
+	if quota != 3500 {
+		t.Fatalf("quota = %d, want 3500", quota)
 	}
 	if result.MatchedTier != "normal" {
 		t.Fatalf("tier = %s, want normal", result.MatchedTier)
@@ -306,9 +312,9 @@ func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// cost = 7000, after group = round(7000 * 1.5) = 10500
-	if quota != 10500 {
-		t.Fatalf("quota = %d, want 10500", quota)
+	// exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
+	if quota != 5250 {
+		t.Fatalf("quota = %d, want 5250", quota)
 	}
 }

@@ -37,7 +37,7 @@ const { Text } = Typography;
 const PRICE_SUFFIX = '$/1M tokens';

 function unitCostToPrice(uc) {
-  return (Number(uc) || 0) * 2;
+  return Number(uc) || 0;
 }

 function formatPrice(uc) {
@@ -269,7 +269,7 @@ const PricingCardView = ({
                      </h3>
                      <div className='flex flex-col gap-1 text-xs mt-1'>
                        {priceData.isDynamicPricing ? (
-                          formatDynamicPriceSummary(priceData.billingExpr, t)
+                          formatDynamicPriceSummary(priceData.billingExpr, t, priceData.usedGroupRatio)
                        ) : (
                          formatPriceInfo(priceData, t, siteDisplayType)
                        )}
@@ -2222,11 +2222,11 @@ function parseTiersFromExpr(exprStr) {
    while ((m = tierRe.exec(exprStr)) !== null) {
      tiers.push({
        label: m[1],
-        inputPrice: Number(m[2]) * 2,
-        outputPrice: Number(m[3]) * 2,
-        cacheReadPrice: m[4] ? Number(m[4]) * 2 : 0,
-        cacheCreatePrice: m[5] ? Number(m[5]) * 2 : 0,
-        cacheCreate1hPrice: m[6] ? Number(m[6]) * 2 : 0,
+        inputPrice: Number(m[2]),
+        outputPrice: Number(m[3]),
+        cacheReadPrice: m[4] ? Number(m[4]) : 0,
+        cacheCreatePrice: m[5] ? Number(m[5]) : 0,
+        cacheCreate1hPrice: m[6] ? Number(m[6]) : 0,
      });
    }
    return tiers;
@@ -897,9 +897,10 @@ export const getModelPriceItems = (
 };

 // 格式化动态计费摘要（用于卡片视图，与 formatPriceInfo 风格统一）
-export const formatDynamicPriceSummary = (billingExpr, t) => {
+export const formatDynamicPriceSummary = (billingExpr, t, groupRatio = 1) => {
  if (!billingExpr) return <span style={{ color: 'var(--semi-color-text-1)' }}>{t('动态计费')}</span>;

+  const gr = groupRatio || 1;
  const tierMatches = billingExpr.match(/tier\(/g) || [];
  const tierCount = tierMatches.length;

@@ -923,19 +924,19 @@ export const formatDynamicPriceSummary = (billingExpr, t) => {
      {firstTierMatch && (
        <>
          <span style={lineStyle}>
-            {t('输入价格')} ${(Number(firstTierMatch[1]) * 2).toFixed(4)}{unitSuffix}
+            {t('输入价格')} ${(Number(firstTierMatch[1]) * gr).toFixed(4)}{unitSuffix}
          </span>
          <span style={lineStyle}>
-            {t('输出价格')} ${(Number(firstTierMatch[2]) * 2).toFixed(4)}{unitSuffix}
+            {t('输出价格')} ${(Number(firstTierMatch[2]) * gr).toFixed(4)}{unitSuffix}
          </span>
          {firstTierMatch[3] && (
            <span style={lineStyle}>
-              {t('缓存读取价格')} ${(Number(firstTierMatch[3]) * 2).toFixed(4)}{unitSuffix}
+              {t('缓存读取价格')} ${(Number(firstTierMatch[3]) * gr).toFixed(4)}{unitSuffix}
            </span>
          )}
          {firstTierMatch[4] && (
            <span style={lineStyle}>
-              {t('缓存创建价格')} ${(Number(firstTierMatch[4]) * 2).toFixed(4)}{unitSuffix}
+              {t('缓存创建价格')} ${(Number(firstTierMatch[4]) * gr).toFixed(4)}{unitSuffix}
            </span>
          )}
        </>
@@ -60,10 +60,10 @@ const { Text } = Typography;
 const PRICE_SUFFIX = '$/1M tokens';

 function unitCostToPrice(uc) {
-  return (Number(uc) || 0) * 2;
+  return Number(uc) || 0;
 }
 function priceToUnitCost(price) {
-  return (Number(price) || 0) / 2;
+  return Number(price) || 0;
 }

 const OPS = ['<', '<=', '>', '>='];
@@ -762,23 +762,23 @@ const PRESET_GROUPS = [
  {
    group: '固定价格',
    presets: [
-      { key: 'flat', label: 'Flat', expr: 'tier("base", p * 1 + c * 2)' },
-      { key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 2.5 + c * 12.5 + cr * 0.25 + cc * 3.125 + cc1h * 5)' },
-      { key: 'gpt-5.4', label: 'GPT-5.4', expr: 'tier("base", p * 1.25 + c * 5 + cr * 0.125)' },
+      { key: 'flat', label: 'Flat', expr: 'tier("base", p * 2 + c * 4)' },
+      { key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)' },
+      { key: 'gpt-5.4', label: 'GPT-5.4', expr: 'tier("base", p * 2.5 + c * 10 + cr * 0.25)' },
    ],
  },
  {
    group: '阶梯计费',
    presets: [
-      { key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'p <= 200000 ? tier("standard", p * 1.5 + c * 7.5 + cr * 0.15 + cc * 1.875 + cc1h * 3) : tier("long_context", p * 3 + c * 11.25 + cr * 0.3 + cc * 3.75 + cc1h * 6)' },
-      { key: 'qwen3-max', label: 'Qwen3-Max', expr: 'p <= 32000 ? tier("short", p * 0.6 + c * 3 + cr * 0.12 + cc * 0.75) : p <= 128000 ? tier("mid", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : tier("long", p * 1.5 + c * 7.5 + cr * 0.3 + cc * 1.875)' },
-      { key: 'glm-4.5-air', label: 'GLM-4.5-Air', expr: 'p < 32000 && c < 200 ? tier("short_output", p * 0.4 + c * 1 + cr * 0.08) : p < 32000 && c >= 200 ? tier("long_output", p * 0.4 + c * 3 + cr * 0.08) : tier("mid_context", p * 0.6 + c * 4 + cr * 0.12)' },
+      { key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)' },
+      { key: 'qwen3-max', label: 'Qwen3-Max', expr: 'p <= 32000 ? tier("short", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : p <= 128000 ? tier("mid", p * 2.4 + c * 12 + cr * 0.48 + cc * 3) : tier("long", p * 3 + c * 15 + cr * 0.6 + cc * 3.75)' },
+      { key: 'glm-4.5-air', label: 'GLM-4.5-Air', expr: 'p < 32000 && c < 200 ? tier("short_output", p * 0.8 + c * 2 + cr * 0.16) : p < 32000 && c >= 200 ? tier("long_output", p * 0.8 + c * 6 + cr * 0.16) : tier("mid_context", p * 1.2 + c * 8 + cr * 0.24)' },
    ],
  },
  {
    group: '多模态',
    presets: [
-      { key: 'qwen3-omni-flash', label: 'Qwen3-Omni-Flash', expr: 'tier("base", p * 0.215 + c * 1.53 + img * 0.39 + ai * 1.905 + ao * 7.555)' },
+      { key: 'qwen3-omni-flash', label: 'Qwen3-Omni-Flash', expr: 'tier("base", p * 0.43 + c * 3.06 + img * 0.78 + ai * 3.81 + ao * 15.11)' },
    ],
  },
  {
@@ -786,12 +786,12 @@ const PRESET_GROUPS = [
    presets: [
      {
        key: 'claude-opus-fast', label: 'Claude Opus 4.6 Fast',
-        expr: 'tier("base", p * 2.5 + c * 12.5 + cr * 0.25 + cc * 3.125 + cc1h * 5)',
+        expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)',
        requestRules: [{ conditions: [{ source: SOURCE_HEADER, path: 'anthropic-beta', mode: MATCH_CONTAINS, value: 'fast-mode-2026-02-01' }], multiplier: '6' }],
      },
      {
        key: 'gpt-5.4-fast', label: 'GPT-5.4 Fast',
-        expr: 'tier("base", p * 1.25 + c * 5 + cr * 0.125)',
+        expr: 'tier("base", p * 2.5 + c * 10 + cr * 0.25)',
        requestRules: [{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'fast' }], multiplier: '2' }],
      },
    ],
@@ -801,12 +801,12 @@ const PRESET_GROUPS = [
    presets: [
      {
        key: 'night-discount', label: '夜间半价',
-        expr: 'tier("base", p * 1.5 + c * 7.5)',
+        expr: 'tier("base", p * 3 + c * 15)',
        requestRules: [{ conditions: [{ source: SOURCE_TIME, timeFunc: 'hour', timezone: 'Asia/Shanghai', mode: MATCH_RANGE, rangeStart: '21', rangeEnd: '6' }], multiplier: '0.5' }],
      },
      {
        key: 'weekend-discount', label: '周末8折',
-        expr: 'tier("base", p * 1.5 + c * 7.5)',
+        expr: 'tier("base", p * 3 + c * 15)',
        requestRules: [
          { conditions: [{ source: SOURCE_TIME, timeFunc: 'weekday', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '0' }], multiplier: '0.8' },
          { conditions: [{ source: SOURCE_TIME, timeFunc: 'weekday', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '6' }], multiplier: '0.8' },
@@ -814,7 +814,7 @@ const PRESET_GROUPS = [
      },
      {
        key: 'new-year-promo', label: '新年促销',
-        expr: 'tier("base", p * 1.5 + c * 7.5)',
+        expr: 'tier("base", p * 3 + c * 15)',
        requestRules: [{ conditions: [
          { source: SOURCE_TIME, timeFunc: 'month', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '1' },
          { source: SOURCE_TIME, timeFunc: 'day', timezone: 'Asia/Shanghai', mode: MATCH_EQ, value: '1' },