feat: add len variable for tier conditions and LLM prompt helper
This commit is contained in:
@@ -1000,11 +1000,82 @@ func TestImageAudioZero(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// len variable tests — tier conditions based on context length
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const lenTieredExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6)`
|
||||||
|
|
||||||
|
func TestLen_StandardTier(t *testing.T) {
|
||||||
|
params := billingexpr.TokenParams{P: 80000, C: 5000, Len: 100000, CR: 20000}
|
||||||
|
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
want := 80000*3 + 5000*15 + 20000*0.3
|
||||||
|
if math.Abs(cost-want) > 1e-6 {
|
||||||
|
t.Errorf("cost = %f, want %f", cost, want)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "standard" {
|
||||||
|
t.Errorf("tier = %q, want standard", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLen_LongContextTier(t *testing.T) {
|
||||||
|
// p is low (cache subtracted), but len is high (full context)
|
||||||
|
params := billingexpr.TokenParams{P: 50000, C: 5000, Len: 300000, CR: 250000}
|
||||||
|
cost, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
want := 50000*6 + 5000*22.5 + 250000*0.6
|
||||||
|
if math.Abs(cost-want) > 1e-6 {
|
||||||
|
t.Errorf("cost = %f, want %f", cost, want)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "long_context" {
|
||||||
|
t.Errorf("tier = %q, want long_context (len=300000 > 200000)", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLen_BoundaryExact(t *testing.T) {
|
||||||
|
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200000, CR: 100000}
|
||||||
|
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "standard" {
|
||||||
|
t.Errorf("tier = %q, want standard (len=200000 <= 200000)", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLen_BoundaryPlusOne(t *testing.T) {
|
||||||
|
params := billingexpr.TokenParams{P: 100000, C: 1000, Len: 200001, CR: 100001}
|
||||||
|
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "long_context" {
|
||||||
|
t.Errorf("tier = %q, want long_context (len=200001 > 200000)", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLen_ZeroDefaultsToZero(t *testing.T) {
|
||||||
|
// len defaults to 0 when not set
|
||||||
|
params := billingexpr.TokenParams{P: 1000, C: 500}
|
||||||
|
_, trace, err := billingexpr.RunExpr(lenTieredExpr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "standard" {
|
||||||
|
t.Errorf("tier = %q, want standard (len=0 <= 200000)", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Benchmarks: compile vs cached execution
|
// Benchmarks: compile vs cached execution
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
const benchComplexExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
|
const benchComplexExpr = `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
|
||||||
|
|
||||||
func BenchmarkExprCompile(b *testing.B) {
|
func BenchmarkExprCompile(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
@@ -1015,7 +1086,7 @@ func BenchmarkExprCompile(b *testing.B) {
|
|||||||
|
|
||||||
func BenchmarkExprRunCached(b *testing.B) {
|
func BenchmarkExprRunCached(b *testing.B) {
|
||||||
billingexpr.CompileFromCache(benchComplexExpr)
|
billingexpr.CompileFromCache(benchComplexExpr)
|
||||||
params := billingexpr.TokenParams{P: 150000, C: 10000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
|
params := billingexpr.TokenParams{P: 150000, C: 10000, Len: 188000, CR: 30000, CC: 5000, Img: 2000, AI: 1000, AO: 500}
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
billingexpr.RunExpr(benchComplexExpr, params)
|
billingexpr.RunExpr(benchComplexExpr, params)
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ var (
|
|||||||
var compileEnvPrototypeV1 = map[string]interface{}{
|
var compileEnvPrototypeV1 = map[string]interface{}{
|
||||||
"p": float64(0),
|
"p": float64(0),
|
||||||
"c": float64(0),
|
"c": float64(0),
|
||||||
|
"len": float64(0),
|
||||||
"cr": float64(0),
|
"cr": float64(0),
|
||||||
"cc": float64(0),
|
"cc": float64(0),
|
||||||
"cc1h": float64(0),
|
"cc1h": float64(0),
|
||||||
|
|||||||
+16
-3
@@ -30,7 +30,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
|||||||
|
|
||||||
| 变量 | 含义 |
|
| 变量 | 含义 |
|
||||||
|------|------|
|
|------|------|
|
||||||
| `p` | 输入 token 数。**自动排除**表达式中单独计价的子类别(见下方说明) |
|
| `p` | 输入 token 数(**计价用**)。**自动排除**表达式中单独计价的子类别(见下方说明) |
|
||||||
|
| `len` | 输入上下文总长度(**条件判断用**)。不受自动排除影响,始终反映完整输入长度。非 Claude:等于原始 `prompt_tokens`;Claude:等于文本输入 + 缓存读取 + 缓存创建 |
|
||||||
| `cr` | 缓存命中(读取)token 数 |
|
| `cr` | 缓存命中(读取)token 数 |
|
||||||
| `cc` | 缓存创建 token 数(Claude 5分钟 TTL / 通用) |
|
| `cc` | 缓存创建 token 数(Claude 5分钟 TTL / 通用) |
|
||||||
| `cc1h` | 缓存创建 token 数 — 1小时 TTL(Claude 专用) |
|
| `cc1h` | 缓存创建 token 数 — 1小时 TTL(Claude 专用) |
|
||||||
@@ -51,6 +52,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
|||||||
|
|
||||||
**规则:如果表达式使用了某个子类别变量,对应的 token 就从 `p` 或 `c` 中扣除;如果没使用,那些 token 就留在 `p` 或 `c` 里按基础价格计费。**
|
**规则:如果表达式使用了某个子类别变量,对应的 token 就从 `p` 或 `c` 中扣除;如果没使用,那些 token 就留在 `p` 或 `c` 里按基础价格计费。**
|
||||||
|
|
||||||
|
> **重要:`len` 不受自动排除影响。** `len` 始终代表完整的输入上下文长度,不管表达式是否单独对缓存/图片/音频定价。因此**阶梯条件应使用 `len` 而非 `p`**,以避免缓存命中导致 `p` 降低而误判档位。
|
||||||
|
|
||||||
举例说明(假设上游返回的原始数据:prompt_tokens=1000,其中包含 200 cache read、100 image):
|
举例说明(假设上游返回的原始数据:prompt_tokens=1000,其中包含 200 cache read、100 image):
|
||||||
|
|
||||||
| 表达式 | `p` 的值 | 说明 |
|
| 表达式 | `p` 的值 | 说明 |
|
||||||
@@ -93,8 +96,8 @@ Powered by [expr-lang/expr](https://github.com/expr-lang/expr). Expressions are
|
|||||||
# Simple flat pricing
|
# Simple flat pricing
|
||||||
tier("base", p * 2.5 + c * 15 + cr * 0.25)
|
tier("base", p * 2.5 + c * 15 + cr * 0.25)
|
||||||
|
|
||||||
# Multi-tier (Claude Sonnet style)
|
# Multi-tier (Claude Sonnet style) — use len for tier conditions
|
||||||
p <= 200000
|
len <= 200000
|
||||||
? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
|
? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
|
||||||
: tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
|
: tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
|
||||||
|
|
||||||
@@ -199,6 +202,16 @@ Example: `p * 2.5 + c * 15 + cr * 0.25`
|
|||||||
- Expression uses `cr` → cache read tokens subtracted from `p`
|
- Expression uses `cr` → cache read tokens subtracted from `p`
|
||||||
- Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50
|
- Expression doesn't use `img` → image tokens stay in `p`, priced at $2.50
|
||||||
|
|
||||||
|
### `len` — Context Length Variable
|
||||||
|
|
||||||
|
`len` represents the total input context length, designed for **tier condition evaluation** (e.g. `len <= 200000 ? ...`). Unlike `p`, `len` is never reduced by sub-category exclusion.
|
||||||
|
|
||||||
|
**Computation rules:**
|
||||||
|
- **Non-Claude (GPT/OpenAI format)**: `len = prompt_tokens` (the raw total from the upstream response)
|
||||||
|
- **Claude format**: `len = input_tokens + cache_read_tokens + cache_creation_tokens` (since Claude's `input_tokens` is text-only, cache must be added back to reflect full context length)
|
||||||
|
|
||||||
|
This ensures that heavy cache usage doesn't cause the tier condition to incorrectly evaluate to a lower tier. For example, if a request has 300K total context but 250K is cached, `p` with cache subtracted would be only 50K (standard tier), while `len` correctly reports 300K (long-context tier).
|
||||||
|
|
||||||
### Quota Conversion
|
### Quota Conversion
|
||||||
|
|
||||||
Expression coefficients are $/1M tokens. Conversion to internal quota:
|
Expression coefficients are $/1M tokens. Conversion to internal quota:
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ import (
|
|||||||
|
|
||||||
// RunExpr compiles (with cache) and executes an expression string.
|
// RunExpr compiles (with cache) and executes an expression string.
|
||||||
// The environment exposes:
|
// The environment exposes:
|
||||||
// - p, c — prompt / completion tokens
|
// - p, c — prompt / completion tokens (auto-excluding separately-priced sub-categories)
|
||||||
|
// - len — total input context length for tier conditions (never reduced by sub-category exclusion)
|
||||||
// - cr, cc, cc1h — cache read / creation / creation-1h tokens
|
// - cr, cc, cc1h — cache read / creation / creation-1h tokens
|
||||||
// - tier(name, value) — trace callback that records which tier matched
|
// - tier(name, value) — trace callback that records which tier matched
|
||||||
// - max, min, abs, ceil, floor — standard math helpers
|
// - max, min, abs, ceil, floor — standard math helpers
|
||||||
@@ -54,6 +55,7 @@ func runProgram(prog *vm.Program, params TokenParams, request RequestInput) (flo
|
|||||||
env := map[string]interface{}{
|
env := map[string]interface{}{
|
||||||
"p": params.P,
|
"p": params.P,
|
||||||
"c": params.C,
|
"c": params.C,
|
||||||
|
"len": params.Len,
|
||||||
"cr": params.CR,
|
"cr": params.CR,
|
||||||
"cc": params.CC,
|
"cc": params.CC,
|
||||||
"cc1h": params.CC1h,
|
"cc1h": params.CC1h,
|
||||||
|
|||||||
@@ -14,8 +14,9 @@ type RequestInput struct {
|
|||||||
// Fields beyond P and C are optional — when absent they default to 0,
|
// Fields beyond P and C are optional — when absent they default to 0,
|
||||||
// which means cache-unaware expressions keep working unchanged.
|
// which means cache-unaware expressions keep working unchanged.
|
||||||
type TokenParams struct {
|
type TokenParams struct {
|
||||||
P float64 // prompt tokens (text)
|
P float64 // prompt tokens (text) — auto-excludes sub-categories priced separately
|
||||||
C float64 // completion tokens (text)
|
C float64 // completion tokens (text) — auto-excludes sub-categories priced separately
|
||||||
|
Len float64 // total input context length for tier conditions (non-Claude: raw prompt_tokens; Claude: text + cache read + cache creation)
|
||||||
CR float64 // cache read (hit) tokens
|
CR float64 // cache read (hit) tokens
|
||||||
CC float64 // cache creation tokens (5-min TTL for Claude, generic for others)
|
CC float64 // cache creation tokens (5-min TTL for Claude, generic for others)
|
||||||
CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)
|
CC1h float64 // cache creation tokens — 1-hour TTL (Claude only)
|
||||||
|
|||||||
@@ -255,8 +255,9 @@ func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptT
|
|||||||
}
|
}
|
||||||
|
|
||||||
rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
|
rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{
|
||||||
P: float64(promptTokens),
|
P: float64(promptTokens),
|
||||||
C: float64(estimatedCompletionTokens),
|
C: float64(estimatedCompletionTokens),
|
||||||
|
Len: float64(promptTokens),
|
||||||
}, requestInput)
|
}, requestInput)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err)
|
return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err)
|
||||||
|
|||||||
+3
-2
@@ -160,8 +160,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
|||||||
|
|
||||||
var tieredResult *billingexpr.TieredResult
|
var tieredResult *billingexpr.TieredResult
|
||||||
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
|
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
|
||||||
P: float64(usage.InputTokens),
|
P: float64(usage.InputTokens),
|
||||||
C: float64(usage.OutputTokens),
|
C: float64(usage.OutputTokens),
|
||||||
|
Len: float64(usage.InputTokens),
|
||||||
})
|
})
|
||||||
if tieredOk {
|
if tieredOk {
|
||||||
tieredResult = tieredRes
|
tieredResult = tieredRes
|
||||||
|
|||||||
@@ -35,6 +35,14 @@ func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVa
|
|||||||
imgO := float64(usage.CompletionTokenDetails.ImageTokens)
|
imgO := float64(usage.CompletionTokenDetails.ImageTokens)
|
||||||
ao := float64(usage.CompletionTokenDetails.AudioTokens)
|
ao := float64(usage.CompletionTokenDetails.AudioTokens)
|
||||||
|
|
||||||
|
// len = total input context length for tier condition evaluation.
|
||||||
|
// Non-Claude: prompt_tokens already includes everything.
|
||||||
|
// Claude: input_tokens is text-only, so add cache read + cache creation.
|
||||||
|
inputLen := p
|
||||||
|
if isClaudeUsageSemantic {
|
||||||
|
inputLen = p + cr + cc5m + cc1h
|
||||||
|
}
|
||||||
|
|
||||||
if !isClaudeUsageSemantic {
|
if !isClaudeUsageSemantic {
|
||||||
if usedVars["cr"] {
|
if usedVars["cr"] {
|
||||||
p -= cr
|
p -= cr
|
||||||
@@ -69,6 +77,7 @@ func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVa
|
|||||||
return billingexpr.TokenParams{
|
return billingexpr.TokenParams{
|
||||||
P: p,
|
P: p,
|
||||||
C: c,
|
C: c,
|
||||||
|
Len: inputLen,
|
||||||
CR: cr,
|
CR: cr,
|
||||||
CC: cc5m,
|
CC: cc5m,
|
||||||
CC1h: cc1h,
|
CC1h: cc1h,
|
||||||
|
|||||||
@@ -604,6 +604,97 @@ func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// BuildTieredTokenParams: Len computation tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestBuildTieredTokenParams_Len_GPT(t *testing.T) {
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 10000,
|
||||||
|
CompletionTokens: 2000,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 3000,
|
||||||
|
TextTokens: 7000,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
|
||||||
|
usedVars := billingexpr.UsedVars(expr)
|
||||||
|
params := BuildTieredTokenParams(usage, false, usedVars)
|
||||||
|
|
||||||
|
// Non-Claude: Len = raw PromptTokens
|
||||||
|
if params.Len != 10000 {
|
||||||
|
t.Fatalf("Len = %f, want 10000 (raw PromptTokens)", params.Len)
|
||||||
|
}
|
||||||
|
// P should be reduced by cache
|
||||||
|
if params.P != 7000 {
|
||||||
|
t.Fatalf("P = %f, want 7000 (PromptTokens - CachedTokens)", params.P)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildTieredTokenParams_Len_Claude(t *testing.T) {
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 5000,
|
||||||
|
CompletionTokens: 2000,
|
||||||
|
UsageSemantic: "anthropic",
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 3000,
|
||||||
|
TextTokens: 5000,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 1000,
|
||||||
|
ClaudeCacheCreation1hTokens: 500,
|
||||||
|
}
|
||||||
|
expr := `tier("base", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)`
|
||||||
|
usedVars := billingexpr.UsedVars(expr)
|
||||||
|
params := BuildTieredTokenParams(usage, true, usedVars)
|
||||||
|
|
||||||
|
// Claude: Len = PromptTokens + CachedTokens + CacheCreation5m + CacheCreation1h
|
||||||
|
wantLen := float64(5000 + 3000 + 1000 + 500)
|
||||||
|
if params.Len != wantLen {
|
||||||
|
t.Fatalf("Len = %f, want %f (text + cache read + cache creation)", params.Len, wantLen)
|
||||||
|
}
|
||||||
|
// Claude: P is not reduced (isClaudeUsageSemantic = true)
|
||||||
|
if params.P != 5000 {
|
||||||
|
t.Fatalf("P = %f, want 5000 (no subtraction for Claude)", params.P)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildTieredTokenParams_Len_TierCondition(t *testing.T) {
|
||||||
|
// Test that len-based tier conditions work correctly when p is reduced by cache
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 300000,
|
||||||
|
CompletionTokens: 5000,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 250000,
|
||||||
|
TextTokens: 50000,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
expr := `len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6)`
|
||||||
|
usedVars := billingexpr.UsedVars(expr)
|
||||||
|
params := BuildTieredTokenParams(usage, false, usedVars)
|
||||||
|
|
||||||
|
// Len = 300000 (raw prompt), P = 50000 (300000 - 250000 cache)
|
||||||
|
if params.Len != 300000 {
|
||||||
|
t.Fatalf("Len = %f, want 300000", params.Len)
|
||||||
|
}
|
||||||
|
if params.P != 50000 {
|
||||||
|
t.Fatalf("P = %f, want 50000", params.P)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run expression: len=300000 > 200000, so long_context tier
|
||||||
|
cost, trace, err := billingexpr.RunExpr(expr, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if trace.MatchedTier != "long_context" {
|
||||||
|
t.Fatalf("tier = %s, want long_context (len=300000 but p=50000)", trace.MatchedTier)
|
||||||
|
}
|
||||||
|
// long_context: 50000*6 + 5000*22.5 + 250000*0.6
|
||||||
|
wantCost := 50000.0*6 + 5000*22.5 + 250000*0.6
|
||||||
|
if math.Abs(cost-wantCost) > 1e-6 {
|
||||||
|
t.Fatalf("cost = %f, want %f", cost, wantCost)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Stress test: 1000 concurrent goroutines, complex tiered expr vs ratio,
|
// Stress test: 1000 concurrent goroutines, complex tiered expr vs ratio,
|
||||||
// random token counts, verify correctness and measure performance
|
// random token counts, verify correctness and measure performance
|
||||||
|
|||||||
@@ -54,10 +54,10 @@ func SmokeTestExpr(exprStr string) error {
|
|||||||
|
|
||||||
func smokeTestExpr(exprStr string) error {
|
func smokeTestExpr(exprStr string) error {
|
||||||
vectors := []billingexpr.TokenParams{
|
vectors := []billingexpr.TokenParams{
|
||||||
{P: 0, C: 0},
|
{P: 0, C: 0, Len: 0},
|
||||||
{P: 1000, C: 1000},
|
{P: 1000, C: 1000, Len: 1000},
|
||||||
{P: 100000, C: 100000},
|
{P: 100000, C: 100000, Len: 100000},
|
||||||
{P: 1000000, C: 1000000},
|
{P: 1000000, C: 1000000, Len: 1000000},
|
||||||
}
|
}
|
||||||
requests := []billingexpr.RequestInput{
|
requests := []billingexpr.RequestInput{
|
||||||
{},
|
{},
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import React from 'react';
|
|||||||
import { Avatar, Tag, Table, Typography } from '@douyinfe/semi-ui';
|
import { Avatar, Tag, Table, Typography } from '@douyinfe/semi-ui';
|
||||||
import { IconPriceTag } from '@douyinfe/semi-icons';
|
import { IconPriceTag } from '@douyinfe/semi-icons';
|
||||||
import { parseTiersFromExpr, getCurrencyConfig } from '../../../../../helpers';
|
import { parseTiersFromExpr, getCurrencyConfig } from '../../../../../helpers';
|
||||||
import { BILLING_VARS } from '../../../../../constants';
|
import { BILLING_PRICING_VARS } from '../../../../../constants';
|
||||||
import {
|
import {
|
||||||
splitBillingExprAndRequestRules,
|
splitBillingExprAndRequestRules,
|
||||||
tryParseRequestRuleExpr,
|
tryParseRequestRuleExpr,
|
||||||
@@ -113,7 +113,7 @@ export default function DynamicPricingBreakdown({ billingExpr, t }) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const priceFields = BILLING_VARS.map((v) => [v.field, v.shortLabel]);
|
const priceFields = BILLING_PRICING_VARS.map((v) => [v.field, v.shortLabel]);
|
||||||
|
|
||||||
const tierColumns = [
|
const tierColumns = [
|
||||||
{
|
{
|
||||||
|
|||||||
+12
-5
@@ -13,6 +13,7 @@
|
|||||||
export const BILLING_VARS = [
|
export const BILLING_VARS = [
|
||||||
{ key: 'p', field: 'inputPrice', tierField: 'input_unit_cost', label: '输入价格', shortLabel: '输入', side: 'input', isBase: true },
|
{ key: 'p', field: 'inputPrice', tierField: 'input_unit_cost', label: '输入价格', shortLabel: '输入', side: 'input', isBase: true },
|
||||||
{ key: 'c', field: 'outputPrice', tierField: 'output_unit_cost', label: '补全价格', shortLabel: '补全', side: 'output', isBase: true },
|
{ key: 'c', field: 'outputPrice', tierField: 'output_unit_cost', label: '补全价格', shortLabel: '补全', side: 'output', isBase: true },
|
||||||
|
{ key: 'len', field: null, tierField: null, label: '输入长度', shortLabel: '长度', side: 'condition', isConditionOnly: true },
|
||||||
{ key: 'cr', field: 'cacheReadPrice', tierField: 'cache_read_unit_cost', label: '缓存读取价格', shortLabel: '缓存读', side: 'input', group: 'cache' },
|
{ key: 'cr', field: 'cacheReadPrice', tierField: 'cache_read_unit_cost', label: '缓存读取价格', shortLabel: '缓存读', side: 'input', group: 'cache' },
|
||||||
{ key: 'cc', field: 'cacheCreatePrice', tierField: 'cache_create_unit_cost', label: '缓存创建价格', shortLabel: '缓存创建', side: 'input', group: 'cache' },
|
{ key: 'cc', field: 'cacheCreatePrice', tierField: 'cache_create_unit_cost', label: '缓存创建价格', shortLabel: '缓存创建', side: 'input', group: 'cache' },
|
||||||
{ key: 'cc1h', field: 'cacheCreate1hPrice', tierField: 'cache_create_1h_unit_cost', label: '1h缓存创建价格', shortLabel: '1h缓存创建', side: 'input', group: 'cache' },
|
{ key: 'cc1h', field: 'cacheCreate1hPrice', tierField: 'cache_create_1h_unit_cost', label: '1h缓存创建价格', shortLabel: '1h缓存创建', side: 'input', group: 'cache' },
|
||||||
@@ -24,18 +25,20 @@ export const BILLING_VARS = [
|
|||||||
|
|
||||||
export const BILLING_VAR_KEYS = BILLING_VARS.map((v) => v.key);
|
export const BILLING_VAR_KEYS = BILLING_VARS.map((v) => v.key);
|
||||||
|
|
||||||
export const BILLING_EXTRA_VARS = BILLING_VARS.filter((v) => !v.isBase);
|
export const BILLING_PRICING_VARS = BILLING_VARS.filter((v) => !v.isConditionOnly);
|
||||||
|
|
||||||
|
export const BILLING_EXTRA_VARS = BILLING_VARS.filter((v) => !v.isBase && !v.isConditionOnly);
|
||||||
|
|
||||||
export const BILLING_VAR_KEY_TO_FIELD = Object.fromEntries(
|
export const BILLING_VAR_KEY_TO_FIELD = Object.fromEntries(
|
||||||
BILLING_VARS.map((v) => [v.key, v.field]),
|
BILLING_PRICING_VARS.map((v) => [v.key, v.field]),
|
||||||
);
|
);
|
||||||
|
|
||||||
export const BILLING_VAR_FIELD_TO_LABEL = Object.fromEntries(
|
export const BILLING_VAR_FIELD_TO_LABEL = Object.fromEntries(
|
||||||
BILLING_VARS.map((v) => [v.field, v.label]),
|
BILLING_PRICING_VARS.map((v) => [v.field, v.label]),
|
||||||
);
|
);
|
||||||
|
|
||||||
export const BILLING_VAR_FIELD_TO_SHORT_LABEL = Object.fromEntries(
|
export const BILLING_VAR_FIELD_TO_SHORT_LABEL = Object.fromEntries(
|
||||||
BILLING_VARS.map((v) => [v.field, v.shortLabel]),
|
BILLING_PRICING_VARS.map((v) => [v.field, v.shortLabel]),
|
||||||
);
|
);
|
||||||
|
|
||||||
export const BILLING_CACHE_VAR_MAP = BILLING_EXTRA_VARS.map((v) => ({
|
export const BILLING_CACHE_VAR_MAP = BILLING_EXTRA_VARS.map((v) => ({
|
||||||
@@ -44,6 +47,10 @@ export const BILLING_CACHE_VAR_MAP = BILLING_EXTRA_VARS.map((v) => ({
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
export const BILLING_VAR_REGEX = new RegExp(
|
export const BILLING_VAR_REGEX = new RegExp(
|
||||||
`\\b(${BILLING_VAR_KEYS.join('|')})\\s*\\*\\s*([\\d.eE+-]+)`,
|
`\\b(${BILLING_PRICING_VARS.map((v) => v.key).join('|')})\\s*\\*\\s*([\\d.eE+-]+)`,
|
||||||
'g',
|
'g',
|
||||||
);
|
);
|
||||||
|
|
||||||
|
export const BILLING_CONDITION_VARS = BILLING_VARS.filter(
|
||||||
|
(v) => v.isBase || v.isConditionOnly,
|
||||||
|
).map((v) => v.key);
|
||||||
|
|||||||
Vendored
+5
-5
@@ -22,7 +22,7 @@ import { Modal, Tag, Typography, Avatar } from '@douyinfe/semi-ui';
|
|||||||
import { copy, showSuccess } from './utils';
|
import { copy, showSuccess } from './utils';
|
||||||
import { MOBILE_BREAKPOINT } from '../hooks/common/useIsMobile';
|
import { MOBILE_BREAKPOINT } from '../hooks/common/useIsMobile';
|
||||||
import {
|
import {
|
||||||
BILLING_VARS,
|
BILLING_PRICING_VARS,
|
||||||
BILLING_VAR_KEY_TO_FIELD,
|
BILLING_VAR_KEY_TO_FIELD,
|
||||||
BILLING_VAR_REGEX,
|
BILLING_VAR_REGEX,
|
||||||
} from '../constants';
|
} from '../constants';
|
||||||
@@ -2246,7 +2246,7 @@ export function parseTiersFromExpr(exprStr) {
|
|||||||
if (!exprStr) return [];
|
if (!exprStr) return [];
|
||||||
try {
|
try {
|
||||||
const { body } = stripExprVersion(exprStr);
|
const { body } = stripExprVersion(exprStr);
|
||||||
const condGroup = `((?:(?:p|c)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)(?:\\s*&&\\s*(?:p|c)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)*)`;
|
const condGroup = `((?:(?:p|c|len)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)(?:\\s*&&\\s*(?:p|c|len)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)*)`;
|
||||||
const tierRe = new RegExp(`(?:${condGroup}\\s*\\?\\s*)?tier\\("([^"]*)",\\s*([^)]+)\\)`, 'g');
|
const tierRe = new RegExp(`(?:${condGroup}\\s*\\?\\s*)?tier\\("([^"]*)",\\s*([^)]+)\\)`, 'g');
|
||||||
const tiers = [];
|
const tiers = [];
|
||||||
let m;
|
let m;
|
||||||
@@ -2255,7 +2255,7 @@ export function parseTiersFromExpr(exprStr) {
|
|||||||
const conditions = [];
|
const conditions = [];
|
||||||
if (condStr) {
|
if (condStr) {
|
||||||
for (const cp of condStr.split(/\s*&&\s*/)) {
|
for (const cp of condStr.split(/\s*&&\s*/)) {
|
||||||
const cm = cp.trim().match(/^(p|c)\s*(<|<=|>|>=)\s*([\d.eE+]+)$/);
|
const cm = cp.trim().match(/^(p|c|len)\s*(<|<=|>|>=)\s*([\d.eE+]+)$/);
|
||||||
if (cm) conditions.push({ var: cm[1], op: cm[2], value: Number(cm[3]) });
|
if (cm) conditions.push({ var: cm[1], op: cm[2], value: Number(cm[3]) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2293,7 +2293,7 @@ export function renderTieredModelPrice(opts) {
|
|||||||
const { symbol, rate } = getCurrencyConfig();
|
const { symbol, rate } = getCurrencyConfig();
|
||||||
const gr = groupRatio || 1;
|
const gr = groupRatio || 1;
|
||||||
|
|
||||||
const priceLines = BILLING_VARS.map((v) => [v.field, v.label]);
|
const priceLines = BILLING_PRICING_VARS.map((v) => [v.field, v.label]);
|
||||||
|
|
||||||
const lines = [
|
const lines = [
|
||||||
buildBillingText('命中档位:{{tier}}', { tier: matchedTier || tier.label }),
|
buildBillingText('命中档位:{{tier}}', { tier: matchedTier || tier.label }),
|
||||||
@@ -2334,7 +2334,7 @@ export function renderTieredModelPriceSimple(opts) {
|
|||||||
];
|
];
|
||||||
|
|
||||||
if (tier && isPriceDisplayMode(displayMode)) {
|
if (tier && isPriceDisplayMode(displayMode)) {
|
||||||
const priceSegments = BILLING_VARS.map((v) => [v.field, v.shortLabel]);
|
const priceSegments = BILLING_PRICING_VARS.map((v) => [v.field, v.shortLabel]);
|
||||||
for (const [field, label] of priceSegments) {
|
for (const [field, label] of priceSegments) {
|
||||||
if (tier[field] > 0) {
|
if (tier[field] > 0) {
|
||||||
segments.push({
|
segments.push({
|
||||||
|
|||||||
Vendored
+2
-2
@@ -18,7 +18,7 @@ For commercial licensing, please contact support@quantumnous.com
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { Toast, Pagination } from '@douyinfe/semi-ui';
|
import { Toast, Pagination } from '@douyinfe/semi-ui';
|
||||||
import { toastConstants, BILLING_VARS, BILLING_VAR_REGEX } from '../constants';
|
import { toastConstants, BILLING_PRICING_VARS, BILLING_VAR_REGEX } from '../constants';
|
||||||
import React from 'react';
|
import React from 'react';
|
||||||
import { toast } from 'react-toastify';
|
import { toast } from 'react-toastify';
|
||||||
import {
|
import {
|
||||||
@@ -927,7 +927,7 @@ export const formatDynamicPriceSummary = (billingExpr, t, groupRatio = 1) => {
|
|||||||
}
|
}
|
||||||
const hasCoeffs = 'p' in varCoeffs || 'c' in varCoeffs;
|
const hasCoeffs = 'p' in varCoeffs || 'c' in varCoeffs;
|
||||||
|
|
||||||
const varLabels = BILLING_VARS.map((v) => [v.key, v.label]);
|
const varLabels = BILLING_PRICING_VARS.map((v) => [v.key, v.label]);
|
||||||
|
|
||||||
const hasTimeCondition = /\b(?:hour|minute|weekday|month|day)\(/.test(exprBody);
|
const hasTimeCondition = /\b(?:hour|minute|weekday|month|day)\(/.test(exprBody);
|
||||||
const hasRequestCondition = /\b(?:param|header)\(/.test(exprBody);
|
const hasRequestCondition = /\b(?:param|header)\(/.test(exprBody);
|
||||||
|
|||||||
@@ -31,9 +31,10 @@ import {
|
|||||||
TextArea,
|
TextArea,
|
||||||
Typography,
|
Typography,
|
||||||
} from '@douyinfe/semi-ui';
|
} from '@douyinfe/semi-ui';
|
||||||
import { IconDelete, IconPlus } from '@douyinfe/semi-icons';
|
import { IconCopy, IconDelete, IconPlus } from '@douyinfe/semi-icons';
|
||||||
import { renderQuota } from '../../../../helpers/render';
|
import { renderQuota } from '../../../../helpers/render';
|
||||||
import { BILLING_EXTRA_VARS, BILLING_CACHE_VAR_MAP } from '../../../../constants';
|
import { copy, showSuccess } from '../../../../helpers';
|
||||||
|
import { BILLING_EXTRA_VARS, BILLING_CACHE_VAR_MAP, BILLING_CONDITION_VARS } from '../../../../constants';
|
||||||
import {
|
import {
|
||||||
createEmptyCondition,
|
createEmptyCondition,
|
||||||
createEmptyTimeCondition,
|
createEmptyTimeCondition,
|
||||||
@@ -70,6 +71,7 @@ function priceToUnitCost(price) {
|
|||||||
|
|
||||||
const OPS = ['<', '<=', '>', '>='];
|
const OPS = ['<', '<=', '>', '>='];
|
||||||
const VAR_OPTIONS = [
|
const VAR_OPTIONS = [
|
||||||
|
{ value: 'len', label: 'len (长度)' },
|
||||||
{ value: 'p', label: 'p (输入)' },
|
{ value: 'p', label: 'p (输入)' },
|
||||||
{ value: 'c', label: 'c (输出)' },
|
{ value: 'c', label: 'c (输出)' },
|
||||||
];
|
];
|
||||||
@@ -224,7 +226,7 @@ function tryParseVisualConfig(exprStr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Multi-tier: cond1 ? tier(body) : cond2 ? tier(body) : tier(body)
|
// Multi-tier: cond1 ? tier(body) : cond2 ? tier(body) : tier(body)
|
||||||
const condGroup = `((?:(?:p|c)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)(?:\\s*&&\\s*(?:p|c)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)*)`;
|
const condGroup = `((?:(?:p|c|len)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)(?:\\s*&&\\s*(?:p|c|len)\\s*(?:<|<=|>|>=)\\s*[\\d.eE+]+)*)`;
|
||||||
const tierRe = new RegExp(
|
const tierRe = new RegExp(
|
||||||
`(?:${condGroup}\\s*\\?\\s*)?tier\\("([^"]*)",\\s*${bodyPat}\\)`,
|
`(?:${condGroup}\\s*\\?\\s*)?tier\\("([^"]*)",\\s*${bodyPat}\\)`,
|
||||||
'g',
|
'g',
|
||||||
@@ -237,7 +239,7 @@ function tryParseVisualConfig(exprStr) {
|
|||||||
if (condStr) {
|
if (condStr) {
|
||||||
const condParts = condStr.split(/\s*&&\s*/);
|
const condParts = condStr.split(/\s*&&\s*/);
|
||||||
for (const cp of condParts) {
|
for (const cp of condParts) {
|
||||||
const cm = cp.trim().match(/^(p|c)\s*(<|<=|>|>=)\s*([\d.eE+]+)$/);
|
const cm = cp.trim().match(/^(p|c|len)\s*(<|<=|>|>=)\s*([\d.eE+]+)$/);
|
||||||
if (cm) {
|
if (cm) {
|
||||||
conditions.push({ var: cm[1], op: cm[2], value: Number(cm[3]) });
|
conditions.push({ var: cm[1], op: cm[2], value: Number(cm[3]) });
|
||||||
}
|
}
|
||||||
@@ -283,7 +285,7 @@ function ConditionRow({ cond, onChange, onRemove, t }) {
|
|||||||
}}>
|
}}>
|
||||||
<Select
|
<Select
|
||||||
size='small'
|
size='small'
|
||||||
value={cond.var || 'p'}
|
value={cond.var || 'len'}
|
||||||
onChange={(val) => onChange({ ...cond, var: val })}
|
onChange={(val) => onChange({ ...cond, var: val })}
|
||||||
>
|
>
|
||||||
{VAR_OPTIONS.map((v) => (
|
{VAR_OPTIONS.map((v) => (
|
||||||
@@ -500,7 +502,7 @@ function ExtendedPriceBlock({ tier, index, onUpdate, t }) {
|
|||||||
function VisualTierCard({ tier, index, isLast, isOnly, onUpdate, onRemove, t }) {
|
function VisualTierCard({ tier, index, isLast, isOnly, onUpdate, onRemove, t }) {
|
||||||
const conditions = tier.conditions || [];
|
const conditions = tier.conditions || [];
|
||||||
|
|
||||||
const varLabel = { p: t('输入'), c: t('输出') };
|
const varLabel = { len: t('长度'), p: t('输入'), c: t('输出') };
|
||||||
const condSummary = useMemo(() => {
|
const condSummary = useMemo(() => {
|
||||||
if (conditions.length === 0) return t('无条件(兜底档)');
|
if (conditions.length === 0) return t('无条件(兜底档)');
|
||||||
return conditions
|
return conditions
|
||||||
@@ -525,7 +527,7 @@ function VisualTierCard({ tier, index, isLast, isOnly, onUpdate, onRemove, t })
|
|||||||
const addCondition = () => {
|
const addCondition = () => {
|
||||||
if (conditions.length >= 2) return;
|
if (conditions.length >= 2) return;
|
||||||
const usedVars = conditions.map((c) => c.var);
|
const usedVars = conditions.map((c) => c.var);
|
||||||
const nextVar = usedVars.includes('p') ? 'c' : 'p';
|
const nextVar = usedVars.includes('len') ? 'c' : 'len';
|
||||||
onUpdate(index, 'conditions', [
|
onUpdate(index, 'conditions', [
|
||||||
...conditions,
|
...conditions,
|
||||||
{ var: nextVar, op: '<', value: 200000 },
|
{ var: nextVar, op: '<', value: 200000 },
|
||||||
@@ -694,7 +696,7 @@ function VisualEditor({ visualConfig, onChange, t }) {
|
|||||||
) {
|
) {
|
||||||
newTiers[newTiers.length - 1] = {
|
newTiers[newTiers.length - 1] = {
|
||||||
...newTiers[newTiers.length - 1],
|
...newTiers[newTiers.length - 1],
|
||||||
conditions: [{ var: 'p', op: '<', value: 200000 }],
|
conditions: [{ var: 'len', op: '<', value: 200000 }],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
newTiers.push({
|
newTiers.push({
|
||||||
@@ -723,7 +725,7 @@ function VisualEditor({ visualConfig, onChange, t }) {
|
|||||||
<div>
|
<div>
|
||||||
<Banner
|
<Banner
|
||||||
type='info'
|
type='info'
|
||||||
description={t('每个档位可设置 0~2 个条件(对 p 和 c),最后一档为兜底档无需条件。')}
|
description={t('每个档位可设置 0~2 个条件(对 len、p 和 c),最后一档为兜底档无需条件。len 为输入上下文总长度(含缓存),推荐用于阶梯条件。')}
|
||||||
style={{ marginBottom: 12 }}
|
style={{ marginBottom: 12 }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
@@ -762,16 +764,16 @@ const PRESET_GROUPS = [
|
|||||||
presets: [
|
presets: [
|
||||||
{ key: 'flat', label: 'Flat', expr: 'tier("base", p * 2 + c * 4)' },
|
{ key: 'flat', label: 'Flat', expr: 'tier("base", p * 2 + c * 4)' },
|
||||||
{ key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)' },
|
{ key: 'claude-opus', label: 'Claude Opus 4.6', expr: 'tier("base", p * 5 + c * 25 + cr * 0.5 + cc * 6.25 + cc1h * 10)' },
|
||||||
{ key: 'gpt-5.4', label: 'GPT-5.4', expr: 'p <= 272000 ? tier("standard", p * 2.5 + c * 15 + cr * 0.25) : tier("long_context", p * 5 + c * 22.5 + cr * 0.5)' },
|
{ key: 'gpt-5.4', label: 'GPT-5.4', expr: 'len <= 272000 ? tier("standard", p * 2.5 + c * 15 + cr * 0.25) : tier("long_context", p * 5 + c * 22.5 + cr * 0.5)' },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
group: '阶梯计费',
|
group: '阶梯计费',
|
||||||
presets: [
|
presets: [
|
||||||
{ key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)' },
|
{ key: 'claude-sonnet', label: 'Claude Sonnet 4.5', expr: 'len <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)' },
|
||||||
{ key: 'qwen3-max', label: 'Qwen3 Max', expr: 'p <= 32000 ? tier("short", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : p <= 128000 ? tier("mid", p * 2.4 + c * 12 + cr * 0.48 + cc * 3) : tier("long", p * 3 + c * 15 + cr * 0.6 + cc * 3.75)' },
|
{ key: 'qwen3-max', label: 'Qwen3 Max', expr: 'len <= 32000 ? tier("short", p * 1.2 + c * 6 + cr * 0.24 + cc * 1.5) : len <= 128000 ? tier("mid", p * 2.4 + c * 12 + cr * 0.48 + cc * 3) : tier("long", p * 3 + c * 15 + cr * 0.6 + cc * 3.75)' },
|
||||||
{ key: 'glm-4.5-air', label: 'GLM-4.5 Air', expr: 'p < 32000 && c < 200 ? tier("short_output", p * 0.8 + c * 2 + cr * 0.16) : p < 32000 && c >= 200 ? tier("long_output", p * 0.8 + c * 6 + cr * 0.16) : tier("mid_context", p * 1.2 + c * 8 + cr * 0.24)' },
|
{ key: 'glm-4.5-air', label: 'GLM-4.5 Air', expr: 'len < 32000 && c < 200 ? tier("short_output", p * 0.8 + c * 2 + cr * 0.16) : len < 32000 && c >= 200 ? tier("long_output", p * 0.8 + c * 6 + cr * 0.16) : tier("mid_context", p * 1.2 + c * 8 + cr * 0.24)' },
|
||||||
{ key: 'doubao-seed-1.8', label: 'Doubao Seed 1.8', expr: 'p <= 32000 && c <= 200 ? tier("discount", p * 0.8 + c * 2 + cr * 0.16 + cc * 0.17) : p <= 32000 ? tier("short", p * 0.8 + c * 8 + cr * 0.16 + cc * 0.17) : p <= 128000 ? tier("mid", p * 1.2 + c * 16 + cr * 0.16 + cc * 0.17) : tier("long", p * 2.4 + c * 24 + cr * 0.16 + cc * 0.17)' },
|
{ key: 'doubao-seed-1.8', label: 'Doubao Seed 1.8', expr: 'len <= 32000 && c <= 200 ? tier("discount", p * 0.8 + c * 2 + cr * 0.16 + cc * 0.17) : len <= 32000 ? tier("short", p * 0.8 + c * 8 + cr * 0.16 + cc * 0.17) : len <= 128000 ? tier("mid", p * 1.2 + c * 16 + cr * 0.16 + cc * 0.17) : tier("long", p * 2.4 + c * 24 + cr * 0.16 + cc * 0.17)' },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -793,7 +795,7 @@ const PRESET_GROUPS = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
key: 'gpt-5.4-tiers', label: 'GPT-5.4 Priority/Flex',
|
key: 'gpt-5.4-tiers', label: 'GPT-5.4 Priority/Flex',
|
||||||
expr: 'p <= 272000 ? tier("standard", p * 2.5 + c * 15 + cr * 0.25) : tier("long_context", p * 5 + c * 22.5 + cr * 0.5)',
|
expr: 'len <= 272000 ? tier("standard", p * 2.5 + c * 15 + cr * 0.25) : tier("long_context", p * 5 + c * 22.5 + cr * 0.5)',
|
||||||
requestRules: [
|
requestRules: [
|
||||||
{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'priority' }], multiplier: '2' },
|
{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'priority' }], multiplier: '2' },
|
||||||
{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'flex' }], multiplier: '0.5' },
|
{ conditions: [{ source: SOURCE_PARAM, path: 'service_tier', mode: MATCH_EQ, value: 'flex' }], multiplier: '0.5' },
|
||||||
@@ -880,7 +882,8 @@ function RawExprEditor({ exprString, onChange, t }) {
|
|||||||
<div>
|
<div>
|
||||||
<div>
|
<div>
|
||||||
{t('变量')}: <code>p</code> ({t('输入 Token')}), <code>c</code> (
|
{t('变量')}: <code>p</code> ({t('输入 Token')}), <code>c</code> (
|
||||||
{t('输出 Token')}), <code>cr</code> ({t('缓存读取')}),{' '}
|
{t('输出 Token')}), <code>len</code> ({t('输入长度')}),{' '}
|
||||||
|
<code>cr</code> ({t('缓存读取')}),{' '}
|
||||||
<code>cc</code> ({t('缓存创建')}),{' '}
|
<code>cc</code> ({t('缓存创建')}),{' '}
|
||||||
<code>cc1h</code> ({t('缓存创建-1小时')})
|
<code>cc1h</code> ({t('缓存创建-1小时')})
|
||||||
</div>
|
</div>
|
||||||
@@ -968,7 +971,11 @@ function evalExprLocally(exprStr, p, c, extraTokenValues) {
|
|||||||
matchedTier = name;
|
matchedTier = name;
|
||||||
return value;
|
return value;
|
||||||
};
|
};
|
||||||
const env = { p, c, tier: tierFn, max: Math.max, min: Math.min, abs: Math.abs, ceil: Math.ceil, floor: Math.floor };
|
const cacheReadTokens = extraTokenValues.cacheReadTokens || 0;
|
||||||
|
const cacheCreateTokens = extraTokenValues.cacheCreateTokens || 0;
|
||||||
|
const cacheCreate1hTokens = extraTokenValues.cacheCreate1hTokens || 0;
|
||||||
|
const len = p + cacheReadTokens + cacheCreateTokens + cacheCreate1hTokens;
|
||||||
|
const env = { p, c, len, tier: tierFn, max: Math.max, min: Math.min, abs: Math.abs, ceil: Math.ceil, floor: Math.floor };
|
||||||
for (const field of EXTRA_ESTIMATOR_FIELDS) {
|
for (const field of EXTRA_ESTIMATOR_FIELDS) {
|
||||||
env[field.var] = extraTokenValues[field.stateKey] || 0;
|
env[field.var] = extraTokenValues[field.stateKey] || 0;
|
||||||
}
|
}
|
||||||
@@ -1220,6 +1227,146 @@ function RuleGroupCard({ group, index, onChange, onRemove, t }) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// LLM prompt helper — copyable prompt for LLM-assisted expression design
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const LLM_PROMPT_TEMPLATE = `你是一个 AI API 计费表达式设计助手。用户需要你帮忙设计一个计费表达式(billing expression),用于 AI API 网关的模型计费。
|
||||||
|
|
||||||
|
## 表达式语言
|
||||||
|
|
||||||
|
表达式基于 expr-lang/expr,支持标准算术运算和三元运算符。
|
||||||
|
|
||||||
|
### Token 变量
|
||||||
|
|
||||||
|
输入侧:
|
||||||
|
- p — 输入 token 数(计价用)。系统会自动排除表达式中单独计价的子类别(如用了 cr,缓存 token 就从 p 中扣除)
|
||||||
|
- len — 输入上下文总长度(条件判断用)。不受自动排除影响,始终反映完整输入长度。用于阶梯条件判断
|
||||||
|
- cr — 缓存命中(读取)token 数
|
||||||
|
- cc — 缓存创建 token 数(5分钟 TTL)
|
||||||
|
- cc1h — 缓存创建 token 数(1小时 TTL,Claude 专用)
|
||||||
|
- img — 图片输入 token 数
|
||||||
|
- ai — 音频输入 token 数
|
||||||
|
|
||||||
|
输出侧:
|
||||||
|
- c — 输出 token 数。同样会自动排除单独计价的子类别
|
||||||
|
- img_o — 图片输出 token 数
|
||||||
|
- ao — 音频输出 token 数
|
||||||
|
|
||||||
|
### p/c 自动排除机制
|
||||||
|
|
||||||
|
p 和 c 是兜底变量,代表所有没有被表达式单独定价的 token。如果表达式使用了某个子类别变量(如 cr),对应 token 就从 p 中扣除,避免重复计费。没用到的子类别 token 则留在 p/c 中按基础价格计费。
|
||||||
|
|
||||||
|
重要:len 不受自动排除影响。阶梯条件应使用 len 而非 p,以避免缓存命中导致 p 降低而误判档位。
|
||||||
|
|
||||||
|
### 内置函数
|
||||||
|
|
||||||
|
- tier(name, value) — 标记计费档位名称,必须包裹费用表达式
|
||||||
|
- max(a, b)、min(a, b) — 取大/小值
|
||||||
|
- ceil(x)、floor(x)、abs(x) — 向上取整、向下取整、绝对值
|
||||||
|
- header(name) — 读取请求头
|
||||||
|
- param(path) — 读取请求体 JSON 路径(gjson 语法)
|
||||||
|
- has(source, substr) — 子字符串检查
|
||||||
|
- hour(tz)、minute(tz)、weekday(tz)、month(tz)、day(tz) — 时间函数,tz 为时区如 "Asia/Shanghai"
|
||||||
|
|
||||||
|
### 价格系数
|
||||||
|
|
||||||
|
表达式中的数字系数是 $/1M tokens 的价格。例如 p * 2.5 表示输入 $2.50/1M tokens。
|
||||||
|
|
||||||
|
## 表达式示例
|
||||||
|
|
||||||
|
简单定价:
|
||||||
|
tier("base", p * 2.5 + c * 15)
|
||||||
|
|
||||||
|
带缓存的定价:
|
||||||
|
tier("base", p * 2.5 + c * 15 + cr * 0.25)
|
||||||
|
|
||||||
|
多档阶梯(用 len 做条件):
|
||||||
|
len <= 200000
|
||||||
|
? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6)
|
||||||
|
: tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12)
|
||||||
|
|
||||||
|
图片模型:
|
||||||
|
tier("base", p * 2 + c * 8 + img * 2.5)
|
||||||
|
|
||||||
|
多模态含音频:
|
||||||
|
tier("base", p * 0.43 + c * 3.06 + img * 0.78 + ai * 3.81 + ao * 15.11)
|
||||||
|
|
||||||
|
三档阶梯示例:
|
||||||
|
len <= 128000
|
||||||
|
? tier("standard", p * 1.1 + c * 4.4)
|
||||||
|
: (len <= 1000000
|
||||||
|
? tier("medium", p * 2.2 + c * 8.8)
|
||||||
|
: tier("long", p * 4.4 + c * 17.6))
|
||||||
|
|
||||||
|
## 规则
|
||||||
|
|
||||||
|
1. 每个叶子分支必须用 tier("名称", 费用表达式) 包裹
|
||||||
|
2. tier 名称用英文,如 "base"、"standard"、"long_context"
|
||||||
|
3. 阶梯条件用 len(不要用 p),支持 <、<=、>、>=
|
||||||
|
4. 多档用嵌套三元运算符:条件1 ? tier(...) : (条件2 ? tier(...) : tier(...))
|
||||||
|
5. 价格系数直接写供应商官方 $/1M tokens 价格
|
||||||
|
6. 不需要缓存/图片/音频单独定价时可以不写对应变量,它们的 token 会自动包含在 p/c 中
|
||||||
|
|
||||||
|
请根据用户提供的模型信息和定价需求,生成计费表达式。`;
|
||||||
|
|
||||||
|
function LlmPromptHelper({ t, model }) {
|
||||||
|
const [open, setOpen] = useState(false);
|
||||||
|
|
||||||
|
const modelName = model?.name || '';
|
||||||
|
const prompt = useMemo(() => {
|
||||||
|
if (modelName) {
|
||||||
|
return LLM_PROMPT_TEMPLATE + `\n\n当前模型:${modelName}`;
|
||||||
|
}
|
||||||
|
return LLM_PROMPT_TEMPLATE;
|
||||||
|
}, [modelName]);
|
||||||
|
|
||||||
|
const handleCopy = useCallback(async () => {
|
||||||
|
const ok = await copy(prompt);
|
||||||
|
if (ok) showSuccess(t('已复制到剪贴板'));
|
||||||
|
}, [prompt, t]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div style={{ marginBottom: 12 }}>
|
||||||
|
<Button
|
||||||
|
theme='borderless'
|
||||||
|
size='small'
|
||||||
|
icon={<IconCopy />}
|
||||||
|
onClick={() => setOpen(!open)}
|
||||||
|
style={{ color: 'var(--semi-color-tertiary)' }}
|
||||||
|
>
|
||||||
|
{t('LLM 辅助设计提示词')}
|
||||||
|
</Button>
|
||||||
|
<Collapsible isOpen={open}>
|
||||||
|
<Card
|
||||||
|
bodyStyle={{ padding: 12 }}
|
||||||
|
style={{ marginTop: 8, background: 'var(--semi-color-fill-0)' }}
|
||||||
|
>
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 8 }}>
|
||||||
|
<Text size='small' type='secondary'>
|
||||||
|
{t('复制以下提示词发送给 LLM(如 ChatGPT / Claude),让它帮你设计计费表达式')}
|
||||||
|
</Text>
|
||||||
|
<Button
|
||||||
|
icon={<IconCopy />}
|
||||||
|
size='small'
|
||||||
|
theme='light'
|
||||||
|
onClick={handleCopy}
|
||||||
|
>
|
||||||
|
{t('复制提示词')}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
<TextArea
|
||||||
|
value={prompt}
|
||||||
|
readonly
|
||||||
|
autosize={{ minRows: 6, maxRows: 20 }}
|
||||||
|
style={{ fontFamily: 'monospace', fontSize: 12 }}
|
||||||
|
/>
|
||||||
|
</Card>
|
||||||
|
</Collapsible>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Main component
|
// Main component
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -1543,6 +1690,8 @@ export default function TieredPricingEditor({ model, onExprChange, requestRuleEx
|
|||||||
</div>
|
</div>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
<LlmPromptHelper t={t} model={model} />
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user