feat: add len variable for tier conditions and LLM prompt helper
This commit is contained in:
@@ -1000,11 +1000,82 @@ func TestImageAudioZero(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// len variable tests — tier conditions based on context length
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const lenTieredExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6)`
|
||||
|
||||
func TestLen_StandardTier(t *testing.T) {
|
||||
params := billingexpr.TokenParams{P: 80000, C: 5000, Len: 100000, CR: 20000}
|
||||
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
want := 80000*3 + 5000*15 + 20000*0.3
|
||||
if math.Abs(cost-want) > 1e-6 {
|
||||
t.Errorf("cost = %f, want %f", cost, want)
|
||||
}
|
||||
if trace.MatchedTier != "standard" {
|
||||
t.Errorf("tier = %q, want standard", trace.MatchedTier)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLen_LongContextTier(t *testing.T) {
|
||||
// p is low (cache subtracted), but len is high (full context)
|
||||
params := billingexpr.TokenParams{P: 50000, C: 5000, Len: 300000, CR: 250000}
|
||||
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
want := 50000*6 + 5000*22.5 + 250000*0.6
|
||||
if math.Abs(cost-want) > 1e-6 {
|
||||
t.Errorf("cost = %f, want %f", cost, want)
|
||||
}
|
||||
if trace.MatchedTier != "long_context" {
|
||||
t.Errorf("tier = %q, want long_context (len=300000 > 200000)", trace.MatchedTier)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLen_BoundaryExact(t *testing.T) {
|
||||
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200000, CR: 100000}
|
||||
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if trace.MatchedTier != "standard" {
|
||||
t.Errorf("tier = %q, want standard (len=200000 <= 200000)", trace.MatchedTier)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLen_BoundaryPlusOne(t *testing.T) {
|
||||
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200001, CR: 100001}
|
||||
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if trace.MatchedTier != "long_context" {
|
||||
t.Errorf("tier = %q, want long_context (len=200001 > 200000)", trace.MatchedTier)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLen_ZeroDefaultsToZero(t *testing.T) {
|
||||
// len defaults to 0 when not set
|
||||
params := billingexpr.TokenParams{P: 1000, C: 500}
|
||||
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if trace.MatchedTier != "standard" {
|
||||
t.Errorf("tier = %q, want standard (len=0 <= 200000)", trace.MatchedTier)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmarks: compile vs cached execution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const benchComplexExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
|
||||
const benchComplexExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
|
||||
|
||||
func BenchmarkExprCompile(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
@@ -1015,7 +1086,7 @@ func BenchmarkExprCompile(b *testing.B) {
|
||||
|
||||
func BenchmarkExprRunCached(b *testing.B) {
|
||||
billingexpr.CompileFromCache(benchComplexExpr)
|
||||
params := billingexpr.TokenParams{P: 150000, C: 10000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
|
||||
params := billingexpr.TokenParams{P: 150000, C: 10000, Len: 188000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
billingexpr.RunExpr(benchComplexExpr, params)
|
||||
|
||||
@@ -41,6 +41,7 @@ var (
|
||||
var compileEnvPrototypeV1 = map[string]interface{}{
|
||||
"p": float64(0),
|
||||
"c": float64(0),
|
||||
"len": float64(0),
|
||||
"cr": float64(0),
|
||||
"cc": float64(0),
|
||||
"cc1h": float64(0),
|
||||
|
||||
+16
-3
@@ -30,7 +30,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
||||
|
||||
| 变量 | 含义 |
|
||||
|------|------|
|
||||
| `p` | 输入 token 数。**自动排除**表达式中单独计价的子类别(见下方说明) |
|
||||
| `p` | 输入 token 数(**计价用**)。**自动排除**表达式中单独计价的子类别(见下方说明) |
|
||||
| `len` | 输入上下文总长度(**条件判断用**)。不受自动排除影响,始终反映完整输入长度。非 Claude:等于原始 `prompt_tokens`;Claude:等于文本输入 + 缓存读取 + 缓存创建 |
|
||||
| `cr` | 缓存命中(读取)token 数 |
|
||||
| `cc` | 缓存创建 token 数(Claude 5分钟 TTL / 通用) |
|
||||
| `cc1h` | 缓存创建 token 数 — 1小时 TTL(Claude 专用) |
|
||||
@@ -51,6 +52,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
||||
|
||||
**规则:如果表达式使用了某个子类别变量,对应的 token 就从 `p` 或 `c` 中扣除;如果没使用,那些 token 就留在 `p` 或 `c` 里按基础价格计费。**
|
||||
|
||||
> **重要:`len` 不受自动排除影响。** `len` 始终代表完整的输入上下文长度,不管表达式是否单独对缓存/图片/音频定价。因此**阶梯条件应使用 `len` 而非 `p`**,以避免缓存命中导致 `p` 降低而误判档位。
|
||||
|
||||
举例说明(假设上游返回的原始数据:prompt_tokens=1000,其中包含 200 cache read、100 image):
|
||||
|
||||
| 表达式 | `p` 的值 | 说明 |
|
||||
@@ -93,8 +96,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
||||
# Simple flat pricing
|
||||
tier("base", p * 2.5 + c * 15 + cr * 0.25)
|
||||
|
||||
# Multi-tier (Claude Sonnet style)
|
||||
p <= 200000
|
||||
# Multi-tier (Claude Sonnet style) — use len for tier conditions
|
||||
len <= 200000
|
||||
? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
|
||||
: tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
|
||||
|
||||
@@ -199,6 +202,16 @@ Example: `p * 2.5 + c * 15 + cr * 0.25`
|
||||
- Expression uses `cr` → cache read tokens subtracted from `p`
|
||||
- Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50
|
||||
|
||||
### `len` — Context Length Variable
|
||||
|
||||
`len` represents the total input context length, designed for **tier condition evaluation** (e.g. `len <= 200000 ? ...`). Unlike `p`, `len` is never reduced by sub-category exclusion.
|
||||
|
||||
**Computation rules:**
|
||||
- **Non-Claude (GPT/OpenAI format)**: `len = prompt_tokens` (the raw total from the upstream response)
|
||||
- **Claude format**: `len = input_tokens + cache_read_tokens + cache_creation_tokens` (since Claude's `input_tokens` is text-only, cache must be added back to reflect full context length)
|
||||
|
||||
This ensures that heavy cache usage doesn't cause the tier condition to incorrectly evaluate to a lower tier. For example, if a request has 300K total context but 250K is cached, `p` with cache subtracted would be only 50K (standard tier), while `len` correctly reports 300K (long-context tier).
|
||||
|
||||
### Quota Conversion
|
||||
|
||||
Expression coefficients are $/1M tokens. Conversion to internal quota:
|
||||
|
||||
@@ -13,7 +13,8 @@ import (
|
||||
|
||||
// RunExpr compiles (with cache) and executes an expression string.
|
||||
// The environment exposes:
|
||||
// - p, c — prompt / completion tokens
|
||||
// - p, c — prompt / completion tokens (auto-excluding separately-priced sub-categories)
|
||||
// - len — total input context length for tier conditions (never reduced by sub-category exclusion)
|
||||
// - cr, cc, cc1h — cache read / creation / creation-1h tokens
|
||||
// - tier(name, value) — trace callback that records which tier matched
|
||||
// - max, min, abs, ceil, floor — standard math helpers
|
||||
@@ -54,6 +55,7 @@ func runProgram(prog *vm.Program, params TokenParams, request RequestInput) (flo
|
||||
env := map[string]interface{}{
|
||||
"p": params.P,
|
||||
"c": params.C,
|
||||
"len": params.Len,
|
||||
"cr": params.CR,
|
||||
"cc": params.CC,
|
||||
"cc1h": params.CC1h,
|
||||
|
||||
@@ -14,8 +14,9 @@ type RequestInput struct {
|
||||
// Fields beyond P and C are optional — when absent they default to 0,
|
||||
// which means cache-unaware expressions keep working unchanged.
|
||||
type TokenParams struct {
|
||||
P float64 // prompt tokens (text)
|
||||
C float64 // completion tokens (text)
|
||||
P float64 // prompt tokens (text) — auto-excludes sub-categories priced separately
|
||||
C float64 // completion tokens (text) — auto-excludes sub-categories priced separately
|
||||
Len float64 // total input context length for tier conditions (non-Claude: raw prompt_tokens; Claude: text + cache read + cache creation)
|
||||
CR float64 // cache read (hit) tokens
|
||||
CC float64 // cache creation tokens (5-min TTL for Claude, generic for others)
|
||||
CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)
|
||||
|
||||
Reference in New Issue
Block a user