Enable more logging, only describe image once when walking history

Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
mudler
2025-03-09 18:50:50 +01:00
committed by Ettore Di Giacinto
parent 28e80084f6
commit bc60dde94f
2 changed files with 15 additions and 2 deletions

View File

@@ -150,6 +150,14 @@ func (m Messages) GetLatestUserMessage() *openai.ChatCompletionMessage {
return nil
}
func (m Messages) IsLastMessageFromRole(role string) bool {
if len(m) == 0 {
return false
}
return m[len(m)-1].Role == role
}
func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act Action, c []openai.ChatCompletionMessage, reasoning string) (*decisionResult, error) {
stateHUD, err := renderTemplate(pickTemplate, a.prepareHUD(), a.systemInternalActions(), reasoning)

View File

@@ -286,9 +286,11 @@ func (a *Agent) processPrompts() {
}
func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (string, error) {
xlog.Debug("Describing image", "model", model, "image", imageURL)
resp, err := a.client.CreateChatCompletion(ctx,
openai.ChatCompletionRequest{
Model: model, Messages: []openai.ChatCompletionMessage{
Model: model,
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
@@ -300,6 +302,7 @@ func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (stri
{
Type: openai.ChatMessagePartTypeImageURL,
ImageURL: &openai.ChatMessageImageURL{
URL: imageURL,
},
},
@@ -313,6 +316,7 @@ func (a *Agent) describeImage(ctx context.Context, model, imageURL string) (stri
return "", fmt.Errorf("no choices")
}
xlog.Debug("Described image", "description", resp.Choices[0].Message.Content)
return resp.Choices[0].Message.Content, nil
}
@@ -343,7 +347,7 @@ func (a *Agent) processUserInputs(job *Job, role string) {
// and add it to the conversation context
if a.options.SeparatedMultimodalModel() && noNewMessage {
lastUserMessage := a.currentConversation.GetLatestUserMessage()
if lastUserMessage != nil {
if lastUserMessage != nil && a.currentConversation.IsLastMessageFromRole(UserRole) {
imageURL, text, err := extractImageContent(*lastUserMessage)
if err == nil {
// We have an image, we need to describe it first
@@ -361,6 +365,7 @@ func (a *Agent) processUserInputs(job *Job, role string) {
Role: role,
Content: text,
})
xlog.Debug("Conversation after image description", "conversation", a.currentConversation)
}
}
}