feat: add len variable for tier conditions and LLM prompt helper

This commit is contained in:
CaIon
2026-04-25 13:24:20 +08:00
parent a7c38ec851
commit f2f3410dcf
15 changed files with 393 additions and 47 deletions
+73 -2
View File
@@ -1000,11 +1000,82 @@ func TestImageAudioZero(t *testing.T) {
}
}
// ---------------------------------------------------------------------------
// len variable tests — tier conditions based on context length
// ---------------------------------------------------------------------------
const lenTieredExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6)`
func TestLen_StandardTier(t *testing.T) {
params := billingexpr.TokenParams{P: 80000, C: 5000, Len: 100000, CR: 20000}
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
if err != nil {
t.Fatal(err)
}
want := 80000*3 + 5000*15 + 20000*0.3
if math.Abs(cost-want) > 1e-6 {
t.Errorf("cost = %f, want %f", cost, want)
}
if trace.MatchedTier != "standard" {
t.Errorf("tier = %q, want standard", trace.MatchedTier)
}
}
func TestLen_LongContextTier(t *testing.T) {
// p is low (cache subtracted), but len is high (full context)
params := billingexpr.TokenParams{P: 50000, C: 5000, Len: 300000, CR: 250000}
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
if err != nil {
t.Fatal(err)
}
want := 50000*6 + 5000*22.5 + 250000*0.6
if math.Abs(cost-want) > 1e-6 {
t.Errorf("cost = %f, want %f", cost, want)
}
if trace.MatchedTier != "long_context" {
t.Errorf("tier = %q, want long_context (len=300000 > 200000)", trace.MatchedTier)
}
}
func TestLen_BoundaryExact(t *testing.T) {
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200000, CR: 100000}
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
if err != nil {
t.Fatal(err)
}
if trace.MatchedTier != "standard" {
t.Errorf("tier = %q, want standard (len=200000 <= 200000)", trace.MatchedTier)
}
}
func TestLen_BoundaryPlusOne(t *testing.T) {
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200001, CR: 100001}
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
if err != nil {
t.Fatal(err)
}
if trace.MatchedTier != "long_context" {
t.Errorf("tier = %q, want long_context (len=200001 > 200000)", trace.MatchedTier)
}
}
func TestLen_ZeroDefaultsToZero(t *testing.T) {
// len defaults to 0 when not set
params := billingexpr.TokenParams{P: 1000, C: 500}
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
if err != nil {
t.Fatal(err)
}
if trace.MatchedTier != "standard" {
t.Errorf("tier = %q, want standard (len=0 <= 200000)", trace.MatchedTier)
}
}
// ---------------------------------------------------------------------------
// Benchmarks: compile vs cached execution
// ---------------------------------------------------------------------------
const benchComplexExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
const benchComplexExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
func BenchmarkExprCompile(b *testing.B) {
for i := 0; i < b.N; i++ {
@@ -1015,7 +1086,7 @@ func BenchmarkExprCompile(b *testing.B) {
func BenchmarkExprRunCached(b *testing.B) {
billingexpr.CompileFromCache(benchComplexExpr)
params := billingexpr.TokenParams{P: 150000, C: 10000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
params := billingexpr.TokenParams{P: 150000, C: 10000, Len: 188000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
b.ResetTimer()
for i := 0; i < b.N; i++ {
billingexpr.RunExpr(benchComplexExpr, params)
+1
View File
@@ -41,6 +41,7 @@ var (
var compileEnvPrototypeV1 = map[string]interface{}{
"p": float64(0),
"c": float64(0),
"len": float64(0),
"cr": float64(0),
"cc": float64(0),
"cc1h": float64(0),
+16 -3
View File
@@ -30,7 +30,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
| 变量 | 含义 |
|------|------|
| `p` | 输入 token 数。**自动排除**表达式中单独计价的子类别(见下方说明) |
| `p` | 输入 token 数**计价用**。**自动排除**表达式中单独计价的子类别(见下方说明) |
| `len` | 输入上下文总长度(**条件判断用**)。不受自动排除影响,始终反映完整输入长度。非 Claude:等于原始 `prompt_tokens`;Claude:等于文本输入 + 缓存读取 + 缓存创建 |
| `cr` | 缓存命中(读取)token 数 |
| `cc` | 缓存创建 token 数(Claude 5分钟 TTL / 通用) |
| `cc1h` | 缓存创建 token 数 — 1小时 TTLClaude 专用) |
@@ -51,6 +52,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
**规则:如果表达式使用了某个子类别变量,对应的 token 就从 `p` 或 `c` 中扣除;如果没使用,那些 token 就留在 `p` 或 `c` 里按基础价格计费。**
> **重要:`len` 不受自动排除影响。** `len` 始终代表完整的输入上下文长度,不管表达式是否单独对缓存/图片/音频定价。因此**阶梯条件应使用 `len` 而非 `p`**,以避免缓存命中导致 `p` 降低而误判档位。
举例说明(假设上游返回的原始数据:prompt_tokens=1000,其中包含 200 cache read、100 image):
| 表达式 | `p` 的值 | 说明 |
@@ -93,8 +96,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
# Simple flat pricing
tier("base", p * 2.5 + c * 15 + cr * 0.25)
# Multi-tier (Claude Sonnet style)
p <= 200000
# Multi-tier (Claude Sonnet style) — use len for tier conditions
len <= 200000
? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
: tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
@@ -199,6 +202,16 @@ Example: `p * 2.5 + c * 15 + cr * 0.25`
- Expression uses `cr` → cache read tokens subtracted from `p`
- Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50
### `len` — Context Length Variable
`len` represents the total input context length, designed for **tier condition evaluation** (e.g. `len <= 200000 ? ...`). Unlike `p`, `len` is never reduced by sub-category exclusion.
**Computation rules:**
- **Non-Claude (GPT/OpenAI format)**: `len = prompt_tokens` (the raw total from the upstream response)
- **Claude format**: `len = input_tokens + cache_read_tokens + cache_creation_tokens` (since Claude's `input_tokens` is text-only, cache must be added back to reflect full context length)
This ensures that heavy cache usage doesn't cause the tier condition to incorrectly evaluate to a lower tier. For example, if a request has 300K total context but 250K is cached, `p` with cache subtracted would be only 50K (standard tier), while `len` correctly reports 300K (long-context tier).
### Quota Conversion
Expression coefficients are $/1M tokens. Conversion to internal quota:
+3 -1
View File
@@ -13,7 +13,8 @@ import (
// RunExpr compiles (with cache) and executes an expression string.
// The environment exposes:
// - p, c — prompt / completion tokens
// - p, c — prompt / completion tokens (auto-excluding separately-priced sub-categories)
// - len — total input context length for tier conditions (never reduced by sub-category exclusion)
// - cr, cc, cc1h — cache read / creation / creation-1h tokens
// - tier(name, value) — trace callback that records which tier matched
// - max, min, abs, ceil, floor — standard math helpers
@@ -54,6 +55,7 @@ func runProgram(prog *vm.Program, params TokenParams, request RequestInput) (flo
env := map[string]interface{}{
"p": params.P,
"c": params.C,
"len": params.Len,
"cr": params.CR,
"cc": params.CC,
"cc1h": params.CC1h,
+3 -2
View File
@@ -14,8 +14,9 @@ type RequestInput struct {
// Fields beyond P and C are optional — when absent they default to 0,
// which means cache-unaware expressions keep working unchanged.
type TokenParams struct {
P float64 // prompt tokens (text)
C float64 // completion tokens (text)
P float64 // prompt tokens (text) — auto-excludes sub-categories priced separately
C float64 // completion tokens (text) — auto-excludes sub-categories priced separately
Len float64 // total input context length for tier conditions (non-Claude: raw prompt_tokens; Claude: text + cache read + cache creation)
CR float64 // cache read (hit) tokens
CC float64 // cache creation tokens (5-min TTL for Claude, generic for others)
CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)