Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Centralized Request Processing middleware #3847

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
240,024 changes: 240,024 additions & 0 deletions .bruno/LocalAI Test Requests/vad/vad test audio.bru

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions .bruno/LocalAI Test Requests/vad/vad test too few.bru
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
meta {
name: vad test too few
type: http
seq: 1
}

post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/vad
body: json
auth: none
}

headers {
Content-Type: application/json
}

body:json {
{
"model": "silero-vad",
"audio": []
}
}
8 changes: 8 additions & 0 deletions aio/cpu/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
2 changes: 1 addition & 1 deletion aio/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ detect_gpu
detect_gpu_size

PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"

check_vars

Expand Down
8 changes: 8 additions & 0 deletions aio/gpu-8g/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
8 changes: 8 additions & 0 deletions aio/intel/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
8 changes: 1 addition & 7 deletions core/application/startup.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {

if options.LoadToMemory != nil {
for _, m := range options.LoadToMemory {
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
config.LoadOptionDebug(options.Debug),
config.LoadOptionThreads(options.Threads),
config.LoadOptionContextSize(options.ContextSize),
config.LoadOptionF16(options.F16),
config.ModelPath(options.ModelPath),
)
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model

// Check if the modelFile exists, if it doesn't try to load it from the gallery
Expand All @@ -46,7 +46,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
}
}

opts := ModelOptions(c, o)
opts := ModelOptions(*c, o)
inferenceModel, err := loader.Load(opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -82,7 +82,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts := gRPCPredictOpts(*c, loader.ModelPath)
opts.Prompt = s
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
Expand Down
6 changes: 3 additions & 3 deletions core/backend/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)

func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
opts := ModelOptions(backendConfig, appConfig)
rerankModel, err := loader.Load(opts...)

if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions core/backend/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
)

func SoundGeneration(
modelFile string,
text string,
duration *float32,
temperature *float32,
Expand All @@ -25,8 +24,9 @@ func SoundGeneration(
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)
soundGenModel, err := loader.Load(opts...)

if err != nil {
return "", nil, err
}
Expand All @@ -44,7 +44,7 @@ func SoundGeneration(

res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text,
Model: modelFile,
Model: backendConfig.Model,
Dst: filePath,
Sample: doSample,
Duration: duration,
Expand Down
7 changes: 3 additions & 4 deletions core/backend/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/model"
)

func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {

modelFile := backendConfig.Model

var inferenceModel grpc.Backend
var err error

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)

// TODO: looks weird, seems to be a correct merge?
if backendConfig.Backend == "" {
inferenceModel, err = loader.Load(opts...)
} else {
Expand Down
2 changes: 1 addition & 1 deletion core/backend/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
tks = append(tks, int(t))
}
tr.Segments = append(tr.Segments,
schema.Segment{
schema.TranscriptionSegment{
Text: s.Text,
Id: int(s.Id),
Start: time.Duration(s.Start),
Expand Down
39 changes: 16 additions & 23 deletions core/backend/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,22 @@ import (
)

func ModelTTS(
backend,
text,
modelFile,
voice,
language string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}

opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
ttsModel, err := loader.Load(opts...)

if err != nil {
return "", nil, err
}

if ttsModel == nil {
return "", nil, fmt.Errorf("could not load piper model")
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
}

if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
Expand All @@ -45,22 +39,21 @@ func ModelTTS(
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)

// If the model file is not empty, we pass it joined with the model path
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
// This should be addressed in a follow up PR soon.
// Copying it over nearly verbatim, as TTS backends are not functional without this.
modelPath := ""
if modelFile != "" {
// If the model file is not empty, we pass it joined with the model path
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, modelFile)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = modelFile
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = backendConfig.Model // skip this step if it fails?????
}

res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
Expand Down
38 changes: 38 additions & 0 deletions core/backend/vad.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package backend

import (
"context"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)

func VAD(request *schema.VADRequest,
ctx context.Context,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
opts := ModelOptions(backendConfig, appConfig)
vadModel, err := ml.Load(opts...)
if err != nil {
return nil, err
}
req := proto.VADRequest{
Audio: request.Audio,
}
resp, err := vadModel.VAD(ctx, &req)
if err != nil {
return nil, err
}

segments := []schema.VADSegment{}
for _, s := range resp.Segments {
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
}

return &schema.VADResponse{
Segments: segments,
}, nil
}
3 changes: 2 additions & 1 deletion core/cli/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model

var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}

filePath, _, err := backend.SoundGeneration(t.Model, text,
filePath, _, err := backend.SoundGeneration(text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

Expand Down
4 changes: 3 additions & 1 deletion core/cli/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {

options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model

filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
if err != nil {
return err
}
Expand Down
39 changes: 28 additions & 11 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,19 +441,21 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int

const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
FLAG_ANY BackendConfigUsecases = 0b00000000000
FLAG_CHAT BackendConfigUsecases = 0b00000000001
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
FLAG_EDIT BackendConfigUsecases = 0b00000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
FLAG_RERANK BackendConfigUsecases = 0b00000010000
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
FLAG_TTS BackendConfigUsecases = 0b00010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
FLAG_VAD BackendConfigUsecases = 0b10000000000

// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)

func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
Expand All @@ -468,6 +470,8 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM,
}
}
Expand Down Expand Up @@ -553,5 +557,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
}
}

if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
return false
}
}

if (u & FLAG_VAD) == FLAG_VAD {
if c.Backend != "silero-vad" {
return false
}
}

return true
}
Loading
Loading