Enable more logging, only describe image once when walking history

Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
mudler
2025-03-09 18:50:50 +01:00
committed by Ettore Di Giacinto
parent 28e80084f6
commit bc60dde94f
2 changed files with 15 additions and 2 deletions

View File

@@ -150,6 +150,14 @@ func (m Messages) GetLatestUserMessage() *openai.ChatCompletionMessage {
return nil return nil
} }
func (m Messages) IsLastMessageFromRole(role string) bool {
if len(m) == 0 {
return false
}
return m[len(m)-1].Role == role
}
func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act Action, c []openai.ChatCompletionMessage, reasoning string) (*decisionResult, error) { func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act Action, c []openai.ChatCompletionMessage, reasoning string) (*decisionResult, error) {
stateHUD, err := renderTemplate(pickTemplate, a.prepareHUD(), a.systemInternalActions(), reasoning) stateHUD, err := renderTemplate(pickTemplate, a.prepareHUD(), a.systemInternalActions(), reasoning)

View File

@@ -286,9 +286,11 @@ func (a *Agent) processPrompts() {
} }
func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (string, error) { func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (string, error) {
xlog.Debug("Describing image", "model", model, "image", imageURL)
resp, err := a.client.CreateChatCompletion(ctx, resp, err := a.client.CreateChatCompletion(ctx,
openai.ChatCompletionRequest{ openai.ChatCompletionRequest{
Model: model, Messages: []openai.ChatCompletionMessage{ Model: model,
Messages: []openai.ChatCompletionMessage{
{ {
Role: "user", Role: "user",
@@ -300,6 +302,7 @@ func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (stri
{ {
Type: openai.ChatMessagePartTypeImageURL, Type: openai.ChatMessagePartTypeImageURL,
ImageURL: &openai.ChatMessageImageURL{ ImageURL: &openai.ChatMessageImageURL{
URL: imageURL, URL: imageURL,
}, },
}, },
@@ -313,6 +316,7 @@ func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (stri
return "", fmt.Errorf("no choices") return "", fmt.Errorf("no choices")
} }
xlog.Debug("Described image", "description", resp.Choices[0].Message.Content)
return resp.Choices[0].Message.Content, nil return resp.Choices[0].Message.Content, nil
} }
@@ -343,7 +347,7 @@ func (a *Agent) processUserInputs(job *Job, role string) {
// and add it to the conversation context // and add it to the conversation context
if a.options.SeparatedMultimodalModel() && noNewMessage { if a.options.SeparatedMultimodalModel() && noNewMessage {
lastUserMessage := a.currentConversation.GetLatestUserMessage() lastUserMessage := a.currentConversation.GetLatestUserMessage()
if lastUserMessage != nil { if lastUserMessage != nil && a.currentConversation.IsLastMessageFromRole(UserRole) {
imageURL, text, err := extractImageContent(*lastUserMessage) imageURL, text, err := extractImageContent(*lastUserMessage)
if err == nil { if err == nil {
// We have an image, we need to describe it first // We have an image, we need to describe it first
@@ -361,6 +365,7 @@ func (a *Agent) processUserInputs(job *Job, role string) {
Role: role, Role: role,
Content: text, Content: text,
}) })
xlog.Debug("Conversation after image description", "conversation", a.currentConversation)
} }
} }
} }