feat: add len variable for tier conditions and LLM prompt helper

2026-04-25 13:24:20 +08:00
parent a7c38ec851
commit f2f3410dcf
15 changed files with 393 additions and 47 deletions
@@ -1000,11 +1000,82 @@ func TestImageAudioZero(t *testing.T) {
 	}
 }

+// ---------------------------------------------------------------------------
+// len variable tests — tier conditions based on context length
+// ---------------------------------------------------------------------------
+
+const lenTieredExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6)`
+
+func TestLen_StandardTier(t *testing.T) {
+	params := billingexpr.TokenParams{P: 80000, C: 5000, Len: 100000, CR: 20000}
+	cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := 80000*3 + 5000*15 + 20000*0.3
+	if math.Abs(cost-want) > 1e-6 {
+		t.Errorf("cost = %f, want %f", cost, want)
+	}
+	if trace.MatchedTier != "standard" {
+		t.Errorf("tier = %q, want standard", trace.MatchedTier)
+	}
+}
+
+func TestLen_LongContextTier(t *testing.T) {
+	// p is low (cache subtracted), but len is high (full context)
+	params := billingexpr.TokenParams{P: 50000, C: 5000, Len: 300000, CR: 250000}
+	cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := 50000*6 + 5000*22.5 + 250000*0.6
+	if math.Abs(cost-want) > 1e-6 {
+		t.Errorf("cost = %f, want %f", cost, want)
+	}
+	if trace.MatchedTier != "long_context" {
+		t.Errorf("tier = %q, want long_context (len=300000 > 200000)", trace.MatchedTier)
+	}
+}
+
+func TestLen_BoundaryExact(t *testing.T) {
+	params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200000, CR: 100000}
+	_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if trace.MatchedTier != "standard" {
+		t.Errorf("tier = %q, want standard (len=200000 <= 200000)", trace.MatchedTier)
+	}
+}
+
+func TestLen_BoundaryPlusOne(t *testing.T) {
+	params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200001, CR: 100001}
+	_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if trace.MatchedTier != "long_context" {
+		t.Errorf("tier = %q, want long_context (len=200001 > 200000)", trace.MatchedTier)
+	}
+}
+
+func TestLen_ZeroDefaultsToZero(t *testing.T) {
+	// len defaults to 0 when not set
+	params := billingexpr.TokenParams{P: 1000, C: 500}
+	_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if trace.MatchedTier != "standard" {
+		t.Errorf("tier = %q, want standard (len=0 <= 200000)", trace.MatchedTier)
+	}
+}
+
 // ---------------------------------------------------------------------------
 // Benchmarks: compile vs cached execution
 // ---------------------------------------------------------------------------

-const benchComplexExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
+const benchComplexExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`

 func BenchmarkExprCompile(b *testing.B) {
 	for i := 0; i < b.N; i++ {
@@ -1015,7 +1086,7 @@ func BenchmarkExprCompile(b *testing.B) {

 func BenchmarkExprRunCached(b *testing.B) {
 	billingexpr.CompileFromCache(benchComplexExpr)
-	params := billingexpr.TokenParams{P: 150000, C: 10000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
+	params := billingexpr.TokenParams{P: 150000, C: 10000, Len: 188000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		billingexpr.RunExpr(benchComplexExpr, params)
@@ -41,6 +41,7 @@ var (
 var compileEnvPrototypeV1 = map[string]interface{}{
 	"p":    float64(0),
 	"c":    float64(0),
+	"len":  float64(0),
 	"cr":   float64(0),
 	"cc":   float64(0),
 	"cc1h": float64(0),
@@ -30,7 +30,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are

 | 变量 | 含义 |
 |------|------|
-| `p` | 输入 token 数。**自动排除**表达式中单独计价的子类别（见下方说明） |
+| `p` | 输入 token 数（**计价用**）。**自动排除**表达式中单独计价的子类别（见下方说明） |
+| `len` | 输入上下文总长度（**条件判断用**）。不受自动排除影响，始终反映完整输入长度。非 Claude：等于原始 `prompt_tokens`；Claude：等于文本输入 + 缓存读取 + 缓存创建 |
 | `cr` | 缓存命中（读取）token 数 |
 | `cc` | 缓存创建 token 数（Claude 5分钟 TTL / 通用） |
 | `cc1h` | 缓存创建 token 数 — 1小时 TTL（Claude 专用） |
@@ -51,6 +52,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are

 **规则：如果表达式使用了某个子类别变量，对应的 token 就从 `p` 或 `c` 中扣除；如果没使用，那些 token 就留在 `p` 或 `c` 里按基础价格计费。**

+> **重要：`len` 不受自动排除影响。** `len` 始终代表完整的输入上下文长度，不管表达式是否单独对缓存/图片/音频定价。因此**阶梯条件应使用 `len` 而非 `p`**，以避免缓存命中导致 `p` 降低而误判档位。
+
 举例说明（假设上游返回的原始数据：prompt_tokens=1000，其中包含 200 cache read、100 image）：

 | 表达式 | `p` 的值 | 说明 |
@@ -93,8 +96,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
 # Simple flat pricing
 tier("base", p * 2.5 + c * 15 + cr * 0.25)

-# Multi-tier (Claude Sonnet style)
-p <= 200000
+# Multi-tier (Claude Sonnet style) — use len for tier conditions
+len <= 200000
  ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
  : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)

@@ -199,6 +202,16 @@ Example: `p * 2.5 + c * 15 + cr * 0.25`
 - Expression uses `cr` → cache read tokens subtracted from `p`
 - Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50

+### `len` — Context Length Variable
+
+`len` represents the total input context length, designed for **tier condition evaluation** (e.g. `len <= 200000 ? ...`). Unlike `p`, `len` is never reduced by sub-category exclusion.
+
+**Computation rules:**
+- **Non-Claude (GPT/OpenAI format)**: `len = prompt_tokens` (the raw total from the upstream response)
+- **Claude format**: `len = input_tokens + cache_read_tokens + cache_creation_tokens` (since Claude's `input_tokens` is text-only, cache must be added back to reflect full context length)
+
+This ensures that heavy cache usage doesn't cause the tier condition to incorrectly evaluate to a lower tier. For example, if a request has 300K total context but 250K is cached, `p` with cache subtracted would be only 50K (standard tier), while `len` correctly reports 300K (long-context tier).
+
 ### Quota Conversion

 Expression coefficients are $/1M tokens. Conversion to internal quota:
@@ -13,7 +13,8 @@ import (

 // RunExpr compiles (with cache) and executes an expression string.
 // The environment exposes:
-//   - p, c             — prompt / completion tokens
+//   - p, c             — prompt / completion tokens (auto-excluding separately-priced sub-categories)
+//   - len              — total input context length for tier conditions (never reduced by sub-category exclusion)
 //   - cr, cc, cc1h     — cache read / creation / creation-1h tokens
 //   - tier(name, value) — trace callback that records which tier matched
 //   - max, min, abs, ceil, floor — standard math helpers
@@ -54,6 +55,7 @@ func runProgram(prog *vm.Program, params TokenParams, request RequestInput) (flo
 	env := map[string]interface{}{
 		"p":    params.P,
 		"c":    params.C,
+		"len":  params.Len,
 		"cr":   params.CR,
 		"cc":   params.CC,
 		"cc1h": params.CC1h,
@@ -14,8 +14,9 @@ type RequestInput struct {
 // Fields beyond P and C are optional — when absent they default to 0,
 // which means cache-unaware expressions keep working unchanged.
 type TokenParams struct {
-	P    float64 // prompt tokens (text)
-	C    float64 // completion tokens (text)
+	P    float64 // prompt tokens (text) — auto-excludes sub-categories priced separately
+	C    float64 // completion tokens (text) — auto-excludes sub-categories priced separately
+	Len  float64 // total input context length for tier conditions (non-Claude: raw prompt_tokens; Claude: text + cache read + cache creation)
 	CR   float64 // cache read (hit) tokens
 	CC   float64 // cache creation tokens (5-min TTL for Claude, generic for others)
 	CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)