Skip to content

Commit

Permalink
Merge pull request #826 from Calcium-Ion/cache
Browse files Browse the repository at this point in the history
feat: Add prompt cache hit tokens support for DeepSeek channel #406
  • Loading branch information
Calcium-Ion authored Mar 8, 2025
2 parents 618908f + 8c209e2 commit d9390ff
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 17 deletions.
1 change: 1 addition & 0 deletions dto/openai_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
}
6 changes: 6 additions & 0 deletions relay/channel/openai/relay-openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
if !containStreamUsage {
usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
usage.CompletionTokens += toolCount * 7
} else {
if info.ChannelType == common.ChannelTypeDeepSeek {
if usage.PromptCacheHitTokens != 0 {
usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
}
}
}

if info.ShouldIncludeUsage && !containStreamUsage {
Expand Down
17 changes: 9 additions & 8 deletions relay/relay-text.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,19 +320,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
groupRatio := priceData.GroupRatio
modelPrice := priceData.ModelPrice

quota := 0
quotaCalculate := 0.0
if !priceData.UsePrice {
quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio))
quota += int(math.Round(float64(completionTokens) * completionRatio))
quota = int(math.Round(float64(quota) * ratio))
if ratio != 0 && quota <= 0 {
quota = 1
quotaCalculate = float64(promptTokens-cacheTokens) + float64(cacheTokens)*cacheRatio
quotaCalculate += float64(completionTokens) * completionRatio
quotaCalculate = quotaCalculate * ratio
if ratio != 0 && quotaCalculate <= 0 {
quotaCalculate = 1
}
} else {
quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
quotaCalculate = modelPrice * common.QuotaPerUnit * groupRatio
}
quota := int(quotaCalculate)
totalTokens := promptTokens + completionTokens

var logContent string
if !priceData.UsePrice {
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
Expand Down
13 changes: 7 additions & 6 deletions service/quota.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"github.com/bytedance/gopkg/util/gopool"
"math"
"one-api/common"
constant2 "one-api/constant"
"one-api/dto"
Expand Down Expand Up @@ -44,16 +43,18 @@ func calculateAudioQuota(info QuotaInfo) int {
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
ratio := info.GroupRatio * info.ModelRatio

quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
quota += int(math.Round(float64(info.InputDetails.AudioTokens)*audioRatio)) +
int(math.Round(float64(info.OutputDetails.AudioTokens)*audioRatio*audioCompletionRatio))
quota := 0.0
quota += float64(info.InputDetails.TextTokens)
quota += float64(info.OutputDetails.TextTokens) * completionRatio
quota += float64(info.InputDetails.AudioTokens) * audioRatio
quota += float64(info.OutputDetails.AudioTokens) * audioRatio * audioCompletionRatio

quota = int(math.Round(float64(quota) * ratio))
quota = quota * ratio
if ratio != 0 && quota <= 0 {
quota = 1
}

return quota
return int(quota)
}

func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error {
Expand Down
6 changes: 3 additions & 3 deletions setting/operation_setting/cache_ratio.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ var defaultCacheRatio = map[string]float64{
"gpt-4o-mini-2024-07-18": 0.5,
"gpt-4o-realtime-preview": 0.5,
"gpt-4o-mini-realtime-preview": 0.5,
"deepseek-chat": 0.5,
"deepseek-reasoner": 0.5,
"deepseek-coder": 0.5,
"deepseek-chat": 0.1,
"deepseek-reasoner": 0.1,
"deepseek-coder": 0.1,
}

var defaultCreateCacheRatio = map[string]float64{}
Expand Down

0 comments on commit d9390ff

Please sign in to comment.