Skip to content

Commit

Permalink
Centralized Request Middleware - also adds VAD tests
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Lee <[email protected]>
  • Loading branch information
dave-gray101 committed Dec 17, 2024
1 parent 708cba0 commit d08311f
Show file tree
Hide file tree
Showing 56 changed files with 481,022 additions and 803 deletions.
240,024 changes: 240,024 additions & 0 deletions .bruno/LocalAI Test Requests/vad/vad test audio.bru

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions .bruno/LocalAI Test Requests/vad/vad test too few.bru
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
meta {
name: vad test too few
type: http
seq: 1
}

post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/vad
body: json
auth: none
}

headers {
Content-Type: application/json
}

body:json {
{
"model": "silero-vad",
"audio": []
}
}
8 changes: 8 additions & 0 deletions aio/cpu/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
2 changes: 1 addition & 1 deletion aio/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ detect_gpu
detect_gpu_size

PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"

check_vars

Expand Down
8 changes: 8 additions & 0 deletions aio/gpu-8g/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
8 changes: 8 additions & 0 deletions aio/intel/vad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
8 changes: 1 addition & 7 deletions core/application/startup.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {

if options.LoadToMemory != nil {
for _, m := range options.LoadToMemory {
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
config.LoadOptionDebug(options.Debug),
config.LoadOptionThreads(options.Threads),
config.LoadOptionContextSize(options.ContextSize),
config.LoadOptionF16(options.F16),
config.ModelPath(options.ModelPath),
)
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions core/backend/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type TokenUsage struct {
Completion int
}

func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model

// Check if the modelFile exists, if it doesn't try to load it from the gallery
Expand All @@ -46,7 +46,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
}
}

opts := ModelOptions(c, o)
opts := ModelOptions(*c, o)
inferenceModel, err := loader.Load(opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -82,7 +82,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im

// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
fn := func() (LLMResponse, error) {
opts := gRPCPredictOpts(c, loader.ModelPath)
opts := gRPCPredictOpts(*c, loader.ModelPath)
opts.Prompt = s
opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
Expand Down
6 changes: 3 additions & 3 deletions core/backend/rerank.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ import (
model "github.com/mudler/LocalAI/pkg/model"
)

func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
opts := ModelOptions(backendConfig, appConfig)
rerankModel, err := loader.Load(opts...)

if err != nil {
return nil, err
}
Expand Down
6 changes: 3 additions & 3 deletions core/backend/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
)

func SoundGeneration(
modelFile string,
text string,
duration *float32,
temperature *float32,
Expand All @@ -25,8 +24,9 @@ func SoundGeneration(
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)
soundGenModel, err := loader.Load(opts...)

if err != nil {
return "", nil, err
}
Expand All @@ -44,7 +44,7 @@ func SoundGeneration(

res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
Text: text,
Model: modelFile,
Model: backendConfig.Model,
Dst: filePath,
Sample: doSample,
Duration: duration,
Expand Down
7 changes: 3 additions & 4 deletions core/backend/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/model"
)

func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {

modelFile := backendConfig.Model

var inferenceModel grpc.Backend
var err error

opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig)

// TODO: looks weird, seems to be a correct merge?
if backendConfig.Backend == "" {
inferenceModel, err = loader.Load(opts...)
} else {
Expand Down
2 changes: 1 addition & 1 deletion core/backend/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
tks = append(tks, int(t))
}
tr.Segments = append(tr.Segments,
schema.Segment{
schema.TranscriptionSegment{
Text: s.Text,
Id: int(s.Id),
Start: time.Duration(s.Start),
Expand Down
39 changes: 16 additions & 23 deletions core/backend/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,22 @@ import (
)

func ModelTTS(
backend,
text,
modelFile,
voice,
language string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}

opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
ttsModel, err := loader.Load(opts...)

if err != nil {
return "", nil, err
}

if ttsModel == nil {
return "", nil, fmt.Errorf("could not load piper model")
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
}

if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
Expand All @@ -45,22 +39,21 @@ func ModelTTS(
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)

// If the model file is not empty, we pass it joined with the model path
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
// This should be addressed in a follow up PR soon.
// Copying it over nearly verbatim, as TTS backends are not functional without this.
modelPath := ""
if modelFile != "" {
// If the model file is not empty, we pass it joined with the model path
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, modelFile)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = modelFile
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = backendConfig.Model // skip this step if it fails?????
}

res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
Expand Down
38 changes: 38 additions & 0 deletions core/backend/vad.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package backend

import (
"context"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)

func VAD(request *schema.VADRequest,
ctx context.Context,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
opts := ModelOptions(backendConfig, appConfig)
vadModel, err := ml.Load(opts...)
if err != nil {
return nil, err
}
req := proto.VADRequest{
Audio: request.Audio,
}
resp, err := vadModel.VAD(ctx, &req)
if err != nil {
return nil, err
}

segments := []schema.VADSegment{}
for _, s := range resp.Segments {
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
}

return &schema.VADResponse{
Segments: segments,
}, nil
}
3 changes: 2 additions & 1 deletion core/cli/soundgeneration.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model

var inputFile *string
if t.InputFile != "" {
inputFile = &t.InputFile
}

filePath, _, err := backend.SoundGeneration(t.Model, text,
filePath, _, err := backend.SoundGeneration(text,
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

Expand Down
4 changes: 3 additions & 1 deletion core/cli/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {

options := config.BackendConfig{}
options.SetDefaults()
options.Backend = t.Backend
options.Model = t.Model

filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
if err != nil {
return err
}
Expand Down
39 changes: 28 additions & 11 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,19 +441,21 @@ func (c *BackendConfig) HasTemplate() bool {
type BackendConfigUsecases int

const (
FLAG_ANY BackendConfigUsecases = 0b000000000
FLAG_CHAT BackendConfigUsecases = 0b000000001
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
FLAG_EDIT BackendConfigUsecases = 0b000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
FLAG_RERANK BackendConfigUsecases = 0b000010000
FLAG_IMAGE BackendConfigUsecases = 0b000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
FLAG_TTS BackendConfigUsecases = 0b010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
FLAG_ANY BackendConfigUsecases = 0b00000000000
FLAG_CHAT BackendConfigUsecases = 0b00000000001
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
FLAG_EDIT BackendConfigUsecases = 0b00000000100
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
FLAG_RERANK BackendConfigUsecases = 0b00000010000
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
FLAG_TTS BackendConfigUsecases = 0b00010000000
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
FLAG_VAD BackendConfigUsecases = 0b10000000000

// Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
)

func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
Expand All @@ -468,6 +470,8 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
"FLAG_TTS": FLAG_TTS,
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
"FLAG_TOKENIZE": FLAG_TOKENIZE,
"FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM,
}
}
Expand Down Expand Up @@ -553,5 +557,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
}
}

if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
return false
}
}

if (u & FLAG_VAD) == FLAG_VAD {
if c.Backend != "silero-vad" {
return false
}
}

return true
}
Loading

0 comments on commit d08311f

Please sign in to comment.