diff --git a/dto/audio.go b/dto/audio.go index e3569172..e0d4f9d0 100644 --- a/dto/audio.go +++ b/dto/audio.go @@ -18,6 +18,16 @@ type AudioRequest struct { Speed *float64 `json:"speed,omitempty"` StreamFormat string `json:"stream_format,omitempty"` Metadata json.RawMessage `json:"metadata,omitempty"` + // vllm-omini + TaskType json.RawMessage `json:"task_type,omitempty"` + Language json.RawMessage `json:"language,omitempty"` + RefAudio json.RawMessage `json:"ref_audio,omitempty"` + RefText json.RawMessage `json:"ref_text,omitempty"` + XVectorOnlyMode json.RawMessage `json:"x_vector_only_mode,omitempty"` + MaxNewTokens json.RawMessage `json:"max_new_tokens,omitempty"` + InitialCodecChunkFrames json.RawMessage `json:"initial_codec_chunk_frames,omitempty"` + // TODO:ensure that the logic remains correct after the stream is started. + //Stream json.RawMessage `json:"stream,omitempty"` } func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta { diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 29a8f349..2b7f71d6 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -369,7 +369,7 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { a.ResponseFormat = request.ResponseFormat if info.RelayMode == relayconstant.RelayModeAudioSpeech { - jsonData, err := json.Marshal(request) + jsonData, err := common.Marshal(request) if err != nil { return nil, fmt.Errorf("error marshalling object: %w", err) }