feat: add billing expression system documentation and enhance tiered billing logic

- Introduced a new rule for the Billing Expression System, emphasizing the importance of reading `pkg/billingexpr/expr.md` for dynamic billing. - Updated the billing expression logic to support new variables and improved handling of image and audio tokens. - Enhanced the tiered billing functionality with versioning support for expressions and refined quota calculations. - Added tests to validate the new billing expression features and ensure correctness in pricing calculations.
2026-03-17 15:29:43 +08:00
parent 5b03b39db2
commit c5405b2a12
27 changed files with 894 additions and 578 deletions
@@ -145,7 +145,7 @@ func TestMathHelpers(t *testing.T) {

 func TestRequestProbeHelpers(t *testing.T) {
 	cost, _, err := billingexpr.RunExprWithRequest(
-		`prompt_tokens * 0.5 + completion_tokens * 1.0 * (param("service_tier") == "fast" ? 2 : 1)`,
+		`p * 0.5 + c * 1.0 * (param("service_tier") == "fast" ? 2 : 1)`,
 		billingexpr.TokenParams{P: 1000, C: 500},
 		billingexpr.RequestInput{
 			Body: []byte(`{"service_tier":"fast"}`),
@@ -976,8 +976,8 @@ func TestAudioTokenVariables(t *testing.T) {
 	}
 }

-func TestImageAudioAliases(t *testing.T) {
-	exprStr := `tier("base", prompt_tokens * 1 + image_tokens * 3 + audio_input_tokens * 5 + audio_output_tokens * 10)`
+func TestImageAudioVariables(t *testing.T) {
+	exprStr := `tier("base", p * 1 + img * 3 + ai * 5 + ao * 10)`
 	cost, _, err := billingexpr.RunExpr(exprStr, billingexpr.TokenParams{P: 100, Img: 50, AI: 20, AO: 10})
 	if err != nil {
 		t.Fatal(err)
@@ -999,3 +999,25 @@ func TestImageAudioZero(t *testing.T) {
 		t.Errorf("cost = %f, want 2000", cost)
 	}
 }
+
+// ---------------------------------------------------------------------------
+// Benchmarks: compile vs cached execution
+// ---------------------------------------------------------------------------
+
+const benchComplexExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
+
+func BenchmarkExprCompile(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		billingexpr.InvalidateCache()
+		billingexpr.CompileFromCache(benchComplexExpr)
+	}
+}
+
+func BenchmarkExprRunCached(b *testing.B) {
+	billingexpr.CompileFromCache(benchComplexExpr)
+	params := billingexpr.TokenParams{P: 150000, C: 10000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		billingexpr.RunExpr(benchComplexExpr, params)
+	}
+}
@@ -3,6 +3,7 @@ package billingexpr
 import (
 	"fmt"
 	"math"
+	"strings"
 	"sync"

 	"github.com/expr-lang/expr"
@@ -12,9 +13,23 @@ import (

 const maxCacheSize = 256

+// DefaultExprVersion is used when an expression string has no version prefix.
+const DefaultExprVersion = 1
+
+// ParseExprVersion extracts the version tag and body from an expression string.
+// Format: "v1:tier(...)" → version=1, body="tier(...)".
+// No prefix defaults to DefaultExprVersion.
+func ParseExprVersion(exprStr string) (version int, body string) {
+	if strings.HasPrefix(exprStr, "v1:") {
+		return 1, exprStr[3:]
+	}
+	return DefaultExprVersion, exprStr
+}
+
 type cachedEntry struct {
 	prog     *vm.Program
 	usedVars map[string]bool
+	version  int
 }

 var (
@@ -22,27 +37,17 @@ var (
 	cache   = make(map[string]*cachedEntry, 64)
 )

-// compileEnvPrototype is the type-checking prototype used at compile time.
-// It declares the shape of the environment that RunExpr will provide.
-// The tier() function is a no-op placeholder here; the real one with
-// side-channel tracing is injected at runtime.
-var compileEnvPrototype = map[string]interface{}{
-	"p":                      float64(0),
-	"c":                      float64(0),
-	"cr":                     float64(0),
-	"cc":                     float64(0),
-	"cc1h":                   float64(0),
-	"prompt_tokens":          float64(0),
-	"completion_tokens":      float64(0),
-	"cache_read_tokens":      float64(0),
-	"cache_create_tokens":    float64(0),
-	"cache_create_1h_tokens": float64(0),
-	"img":                    float64(0),
-	"ai":                     float64(0),
-	"ao":                     float64(0),
-	"image_tokens":           float64(0),
-	"audio_input_tokens":     float64(0),
-	"audio_output_tokens":    float64(0),
+// compileEnvPrototypeV1 is the v1 type-checking prototype used at compile time.
+var compileEnvPrototypeV1 = map[string]interface{}{
+	"p":    float64(0),
+	"c":    float64(0),
+	"cr":   float64(0),
+	"cc":   float64(0),
+	"cc1h": float64(0),
+	"img":  float64(0),
+	"img_o": float64(0),
+	"ai":   float64(0),
+	"ao":   float64(0),
 	"tier":                   func(string, float64) float64 { return 0 },
 	"header":                 func(string) string { return "" },
 	"param":                  func(string) interface{} { return nil },
@@ -59,6 +64,13 @@ var compileEnvPrototype = map[string]interface{}{
 	"floor":                  math.Floor,
 }

+func getCompileEnv(version int) map[string]interface{} {
+	switch version {
+	default:
+		return compileEnvPrototypeV1
+	}
+}
+
 // CompileFromCache compiles an expression string, using a cached program when
 // available. The cache is keyed by the SHA-256 hex digest of the expression.
 func CompileFromCache(exprStr string) (*vm.Program, error) {
@@ -79,7 +91,8 @@ func compileFromCacheByHash(exprStr, hash string) (*vm.Program, error) {
 	}
 	cacheMu.RUnlock()

-	prog, err := expr.Compile(exprStr, expr.Env(compileEnvPrototype), expr.AsFloat64())
+	version, body := ParseExprVersion(exprStr)
+	prog, err := expr.Compile(body, expr.Env(getCompileEnv(version)), expr.AsFloat64())
 	if err != nil {
 		return nil, fmt.Errorf("expr compile error: %w", err)
 	}
@@ -90,12 +103,29 @@ func compileFromCacheByHash(exprStr, hash string) (*vm.Program, error) {
 	if len(cache) >= maxCacheSize {
 		cache = make(map[string]*cachedEntry, 64)
 	}
-	cache[hash] = &cachedEntry{prog: prog, usedVars: vars}
+	cache[hash] = &cachedEntry{prog: prog, usedVars: vars, version: version}
 	cacheMu.Unlock()

 	return prog, nil
 }

+// ExprVersion returns the version of a cached expression. Returns DefaultExprVersion
+// if the expression hasn't been compiled yet or is empty.
+func ExprVersion(exprStr string) int {
+	if exprStr == "" {
+		return DefaultExprVersion
+	}
+	hash := ExprHashString(exprStr)
+	cacheMu.RLock()
+	if entry, ok := cache[hash]; ok {
+		cacheMu.RUnlock()
+		return entry.version
+	}
+	cacheMu.RUnlock()
+	v, _ := ParseExprVersion(exprStr)
+	return v
+}
+
 func extractUsedVars(prog *vm.Program) map[string]bool {
 	vars := make(map[string]bool)
 	node := prog.Node()
@@ -0,0 +1,237 @@
+# Billing Expression System (billingexpr)
+
+## Design Philosophy
+
+**One expression, one truth.** A single expression string completely defines a model's billing logic — pricing, tier conditions, cache/image/audio differentiation, time-based discounts, request-aware multipliers — all in one line. No scattered configuration, no implicit rules, no magic numbers.
+
+The expression is the billing contract between the administrator and the system. What you write is what gets executed. The system's job is to evaluate it faithfully, not to interpret it.
+
+### Core Principles
+
+1. **Expression is self-contained** — The expression string alone determines billing. No external ratio tables, no implicit completion multipliers, no hidden conversion factors. Given the same token counts and request context, the same expression always produces the same cost.
+
+2. **Variables are opt-in** — `p` (prompt) and `c` (completion) are the base. Cache (`cr`, `cc`, `cc1h`), image (`img`), and audio (`ai`, `ao`) variables are optional. If omitted, those tokens are included in `p`/`c` and priced at their rate. The system automatically detects which variables the expression uses (via AST introspection) and adjusts token normalization accordingly.
+
+3. **Prices are real prices** — Expression coefficients are actual $/1M tokens prices as published by providers. No ratio conversion, no `/2` convention. `p * 2.5` means $2.50 per 1M prompt tokens.
+
+4. **Upstream-agnostic** — The expression doesn't need to know whether the upstream API is OpenAI-format (prompt_tokens includes cache) or Claude-format (input_tokens excludes cache). The system normalizes token counts before evaluation based on the upstream response format.
+
+5. **Version-aware** — Expressions carry a version tag (`v1:`, default when omitted). The version controls the compile environment, token normalization, and quota conversion formula, enabling future evolution without breaking existing expressions.
+
+---
+
+## Expression Language
+
+Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are compiled, cached, and evaluated against a runtime environment.
+
+### Token Variables
+
+**输入侧变量：**
+
+| 变量 | 含义 |
+|------|------|
+| `p` | 输入 token 数。**自动排除**表达式中单独计价的子类别（见下方说明） |
+| `cr` | 缓存命中（读取）token 数 |
+| `cc` | 缓存创建 token 数（Claude 5分钟 TTL / 通用） |
+| `cc1h` | 缓存创建 token 数 — 1小时 TTL（Claude 专用） |
+| `img` | 图片输入 token 数 |
+| `ai` | 音频输入 token 数 |
+
+**输出侧变量：**
+
+| 变量 | 含义 |
+|------|------|
+| `c` | 输出 token 数。**自动排除**表达式中单独计价的子类别（见下方说明） |
+| `img_o` | 图片输出 token 数 |
+| `ao` | 音频输出 token 数 |
+
+#### `p` 和 `c` 的自动排除机制
+
+`p` 和 `c` 是"兜底变量"——它们代表**所有没有被表达式单独定价的 token**。系统会根据表达式实际使用了哪些变量，自动从 `p` / `c` 中减去对应的子类别 token，避免重复计费。
+
+**规则：如果表达式使用了某个子类别变量，对应的 token 就从 `p` 或 `c` 中扣除；如果没使用，那些 token 就留在 `p` 或 `c` 里按基础价格计费。**
+
+举例说明（假设上游返回的原始数据：prompt_tokens=1000，其中包含 200 cache read、100 image）：
+
+| 表达式 | `p` 的值 | 说明 |
+|--------|---------|------|
+| `p * 3 + c * 15` | 1000 | 没用 `cr`/`img`，所以缓存和图片都包含在 `p` 里，全按 $3 计费 |
+| `p * 3 + c * 15 + cr * 0.3` | 800 | 用了 `cr`，缓存 200 从 `p` 中扣除，按 $0.3 单独计费；图片仍在 `p` 里按 $3 计费 |
+| `p * 3 + c * 15 + cr * 0.3 + img * 2` | 700 | 用了 `cr` 和 `img`，都从 `p` 中扣除，各自按自己的价格计费 |
+
+输出侧同理（假设 completion_tokens=500，其中包含 100 audio output）：
+
+| 表达式 | `c` 的值 | 说明 |
+|--------|---------|------|
+| `p * 3 + c * 15` | 500 | 没用 `ao`，音频输出包含在 `c` 里按 $15 计费 |
+| `p * 3 + c * 15 + ao * 50` | 400 | 用了 `ao`，音频 100 从 `c` 中扣除按 $50 计费 |
+
+> **注意：** 这个自动排除仅针对 GPT/OpenAI 格式的 API（prompt_tokens 包含所有子类别）。Claude 格式的 API（input_tokens 本身就只包含纯文本）不做任何减法。系统根据上游返回格式自动判断，表达式作者无需关心。
+
+### Built-in Functions
+
+| Function | Signature | Purpose |
+|----------|-----------|---------|
+| `tier` | `tier(name, value) → float64` | Records which pricing tier matched; must wrap the cost expression |
+| `param` | `param(path) → any` | Reads a JSON path from the request body (uses gjson) |
+| `header` | `header(key) → string` | Reads a request header value |
+| `has` | `has(source, substr) → bool` | Substring check |
+| `hour` | `hour(tz) → int` | Current hour in timezone (0-23) |
+| `minute` | `minute(tz) → int` | Current minute (0-59) |
+| `weekday` | `weekday(tz) → int` | Day of week (0=Sunday, 6=Saturday) |
+| `month` | `month(tz) → int` | Month (1-12) |
+| `day` | `day(tz) → int` | Day of month (1-31) |
+| `max` | `max(a, b) → float64` | Math max |
+| `min` | `min(a, b) → float64` | Math min |
+| `abs` | `abs(x) → float64` | Absolute value |
+| `ceil` | `ceil(x) → float64` | Ceiling |
+| `floor` | `floor(x) → float64` | Floor |
+
+### Expression Examples
+
+```
+# Simple flat pricing
+tier("base", p * 2.5 + c * 15 + cr * 0.25)
+
+# Multi-tier (Claude Sonnet style)
+p <= 200000
+  ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
+  : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
+
+# Image model (no separate cache/audio pricing — those tokens stay in p/c)
+tier("base", p * 2 + c * 8 + img * 2.5)
+
+# Multimodal with audio
+tier("base", p * 0.43 + c * 3.06 + img * 0.78 + ai * 3.81 + ao * 15.11)
+```
+
+### Request Rules (appended after `|||`)
+
+Request-conditional multipliers are appended to the expression after a `|||` separator:
+
+```
+tier("base", p * 5 + c * 25)|||when(header("anthropic-beta") has "fast-mode") * 6
+```
+
+These are parsed and applied separately by the request rule system.
+
+---
+
+## Architecture
+
+### Data Flow
+
+```
+Frontend Editor → Storage → Pre-consume → Settlement → Log Display
+```
+
+### 1. Frontend Editor
+
+**File**: `web/src/pages/Setting/Ratio/components/TieredPricingEditor.jsx`
+
+Two editing modes:
+- **Visual mode**: Fill in prices per variable, conditions per tier. Generates expression via `generateExprFromVisualConfig()`.
+- **Raw mode**: Edit the expression string directly. Includes preset templates for common models.
+
+The editor outputs a billing expression string and an optional request rule expression string. These are combined via `combineBillingExpr(billingExpr, requestRuleExpr)` before storage.
+
+### 2. Storage
+
+**File**: `setting/billing_setting/tiered_billing.go`
+
+Two option maps stored in the `options` DB table:
+- `ModelBillingMode`: `{ "model-name": "tiered_expr" }` — activates tiered billing for a model
+- `ModelBillingExpr`: `{ "model-name": "tier(\"base\", p * 2.5 + c * 15)" }` — the expression
+
+On save, the expression is validated:
+1. Compiled via `billingexpr.CompileFromCache()` — syntax check
+2. Smoke-tested with sample token vectors — ensures non-negative results
+
+### 3. Pre-consume (Quota Estimation)
+
+**File**: `relay/helper/price.go` → `modelPriceHelperTiered()`
+
+When a request arrives and the model uses `tiered_expr` billing:
+1. Loads expression from `billing_setting.GetBillingExpr()`
+2. Builds `RequestInput` (headers + body) for `param()` / `header()` functions
+3. Runs expression with estimated tokens: `RunExprWithRequest(expr, {P, C}, requestInput)`
+4. Converts output to quota: `rawCost / 1,000,000 * QuotaPerUnit`
+5. Creates `BillingSnapshot` (frozen state for settlement) and stores on `RelayInfo`
+
+### 4. Settlement (Actual Billing)
+
+**Files**: `service/tiered_settle.go`, `pkg/billingexpr/settle.go`
+
+After the upstream response returns with actual token usage:
+
+1. `BuildTieredTokenParams(usage, isClaudeUsageSemantic, usedVars)`:
+   - Reads actual token counts from `dto.Usage`
+   - For GPT-format APIs (prompt_tokens includes everything): subtracts sub-categories from P/C **only when** the expression uses their variables (detected via AST introspection of the compiled expression)
+   - For Claude-format APIs (input_tokens is text-only): no adjustment needed
+
+2. `TryTieredSettle(relayInfo, params)`:
+   - Uses the frozen `BillingSnapshot` from pre-consume
+   - Re-runs the expression with actual token counts
+   - Converts via `quotaConversion()` (version-dispatched)
+   - Returns actual quota
+
+### 5. Log Display
+
+**Files**: `service/log_info_generate.go`, `web/src/helpers/render.jsx`
+
+Backend: `InjectTieredBillingInfo()` adds `billing_mode`, `expr_b64` (base64 expression), and `matched_tier` to the log's `other` JSON.
+
+Frontend: Detects `billing_mode === "tiered_expr"`, decodes `expr_b64`, parses tiers via shared `parseTiersFromExpr()`, and renders pricing breakdown.
+
+---
+
+## Key Design Decisions
+
+### Token Normalization via AST Introspection
+
+Different upstream APIs report `prompt_tokens` differently:
+- **OpenAI/GPT**: `prompt_tokens` = total (text + cache + image + audio)
+- **Claude**: `input_tokens` = text only (cache reported separately)
+
+The system normalizes `p` to mean "tokens not separately priced" by subtracting sub-categories **only when the expression references them**. This is determined by walking the compiled AST to find `IdentifierNode` references — zero runtime cost after first compilation (cached).
+
+Example: `p * 2.5 + c * 15 + cr * 0.25`
+- Expression uses `cr` → cache read tokens subtracted from `p`
+- Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50
+
+### Quota Conversion
+
+Expression coefficients are $/1M tokens. Conversion to internal quota:
+
+```
+quota = exprOutput / 1,000,000 * QuotaPerUnit * groupRatio
+```
+
+This matches the per-call billing pattern: `quota = modelPrice * QuotaPerUnit * groupRatio`.
+
+### Expression Versioning
+
+Expressions can carry a version prefix: `v1:tier(...)`. No prefix = v1.
+
+Version controls:
+- Compile environment (available variables and functions)
+- Token normalization logic
+- Quota conversion formula
+
+This enables future evolution without breaking existing expressions.
+
+---
+
+## File Map
+
+| Layer | Files |
+|-------|-------|
+| Expression engine | `pkg/billingexpr/compile.go`, `run.go`, `settle.go`, `round.go`, `types.go` |
+| Storage | `setting/billing_setting/tiered_billing.go` |
+| Pre-consume | `relay/helper/price.go`, `relay/helper/billing_expr_request.go` |
+| Settlement | `service/tiered_settle.go`, `service/quota.go` |
+| Log injection | `service/log_info_generate.go` |
+| Frontend editor | `web/src/pages/Setting/Ratio/components/TieredPricingEditor.jsx` |
+| Frontend display | `web/src/helpers/render.jsx`, `web/src/helpers/utils.jsx` |
+| Model detail | `web/src/components/table/model-pricing/modal/components/DynamicPricingBreakdown.jsx` |
+| Log display | `web/src/hooks/usage-logs/useUsageLogsData.jsx`, `web/src/components/table/usage-logs/UsageLogsColumnDefs.jsx` |
@@ -52,22 +52,15 @@ func runProgram(prog *vm.Program, params TokenParams, request RequestInput) (flo
 	headers := normalizeHeaders(request.Headers)

 	env := map[string]interface{}{
-		"p":                      params.P,
-		"c":                      params.C,
-		"cr":                     params.CR,
-		"cc":                     params.CC,
-		"cc1h":                   params.CC1h,
-		"prompt_tokens":          params.P,
-		"completion_tokens":      params.C,
-		"cache_read_tokens":      params.CR,
-		"cache_create_tokens":    params.CC,
-		"cache_create_1h_tokens": params.CC1h,
-		"img":                    params.Img,
-		"ai":                     params.AI,
-		"ao":                     params.AO,
-		"image_tokens":           params.Img,
-		"audio_input_tokens":     params.AI,
-		"audio_output_tokens":    params.AO,
+		"p":    params.P,
+		"c":    params.C,
+		"cr":   params.CR,
+		"cc":   params.CC,
+		"cc1h": params.CC1h,
+		"img":  params.Img,
+		"img_o": params.ImgO,
+		"ai":   params.AI,
+		"ao":   params.AO,
 		"tier": func(name string, value float64) float64 {
 			trace.MatchedTier = name
 			trace.Cost = value
@@ -1,5 +1,15 @@
 package billingexpr

+// quotaConversion converts raw expression output to quota based on the
+// expression version. This is the central dispatch point for future versions
+// that may use a different conversion formula.
+func quotaConversion(exprOutput float64, snap *BillingSnapshot) float64 {
+	switch snap.ExprVersion {
+	default: // v1: coefficients are $/1M tokens prices
+		return exprOutput / 1_000_000 * snap.QuotaPerUnit
+	}
+}
+
 // ComputeTieredQuota runs the Expr from a frozen BillingSnapshot against
 // actual token counts and returns the settlement result.
 func ComputeTieredQuota(snap *BillingSnapshot, params TokenParams) (TieredResult, error) {
@@ -12,7 +22,7 @@ func ComputeTieredQuotaWithRequest(snap *BillingSnapshot, params TokenParams, re
 		return TieredResult{}, err
 	}

-	quotaBeforeGroup := cost / 1_000_000 * snap.QuotaPerUnit
+	quotaBeforeGroup := quotaConversion(cost, snap)
 	afterGroup := QuotaRound(quotaBeforeGroup * snap.GroupRatio)
 	crossed := trace.MatchedTier != snap.EstimatedTier

@@ -20,6 +20,7 @@ type TokenParams struct {
 	CC   float64 // cache creation tokens (5-min TTL for Claude, generic for others)
 	CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)
 	Img  float64 // image input tokens
+	ImgO float64 // image output tokens
 	AI   float64 // audio input tokens
 	AO   float64 // audio output tokens
 }
@@ -46,6 +47,7 @@ type BillingSnapshot struct {
 	EstimatedQuotaAfterGroup  int     `json:"estimated_quota_after_group"`
 	EstimatedTier             string  `json:"estimated_tier"`
 	QuotaPerUnit              float64 `json:"quota_per_unit"`
+	ExprVersion               int     `json:"expr_version"`
 }

 // TieredResult holds everything needed after running tiered settlement.