Files
LocalAGI/agent/agent.go
2024-04-03 20:03:51 +02:00

356 lines
9.8 KiB
Go

package agent
import (
"context"
"fmt"
"strings"
"sync"
"time"
"github.com/mudler/local-agent-framework/action"
"github.com/mudler/local-agent-framework/llm"
"github.com/sashabaranov/go-openai"
)
const pickActionTemplate = `You can take any of the following tools:
{{range .Actions -}}
- {{.Name}}: {{.Description }}
{{ end }}
To answer back to the user, use the "reply" tool.
Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
{{range .Messages -}}
{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
{{end}}
`
const reEvalTemplate = `You can take any of the following tools:
{{range .Actions -}}
- {{.Name}}: {{.Description }}
{{ end }}
To answer back to the user, use the "reply" tool.
Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
{{range .Messages -}}
{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
{{end}}
We already have called tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.`
const (
UserRole = "user"
AssistantRole = "assistant"
SystemRole = "system"
)
type Agent struct {
sync.Mutex
options *options
Character Character
client *openai.Client
jobQueue, selfJobQueue chan *Job
actionContext *action.ActionContext
context *action.ActionContext
availableActions []Action
currentReasoning string
nextAction Action
currentConversation []openai.ChatCompletionMessage
}
func New(opts ...Option) (*Agent, error) {
options, err := newOptions(opts...)
if err != nil {
if err != nil {
err = fmt.Errorf("failed to set options: %v", err)
}
return nil, err
}
client := llm.NewClient(options.LLMAPI.APIKey, options.LLMAPI.APIURL)
c := context.Background()
if options.context != nil {
c = options.context
}
ctx, cancel := context.WithCancel(c)
a := &Agent{
jobQueue: make(chan *Job),
selfJobQueue: make(chan *Job),
options: options,
client: client,
Character: options.character,
context: action.NewContext(ctx, cancel),
availableActions: options.actions,
}
if a.options.randomIdentity {
if err = a.generateIdentity(a.options.randomIdentityGuidance); err != nil {
return a, fmt.Errorf("failed to generate identity: %v", err)
}
}
return a, nil
}
// StopAction stops the current action
// if any. Can be called before adding a new job.
func (a *Agent) StopAction() {
a.Lock()
defer a.Unlock()
if a.actionContext != nil {
a.actionContext.Cancel()
}
}
// Ask is a pre-emptive, blocking call that returns the response as soon as it's ready.
// It discards any other computation.
func (a *Agent) Ask(opts ...JobOption) []ActionState {
a.StopAction()
j := NewJob(opts...)
// fmt.Println("Job created", text)
a.jobQueue <- j
return j.Result.WaitResult()
}
func (a *Agent) CurrentConversation() []openai.ChatCompletionMessage {
a.Lock()
defer a.Unlock()
return a.currentConversation
}
func (a *Agent) ResetConversation() {
a.Lock()
defer a.Unlock()
a.currentConversation = []openai.ChatCompletionMessage{}
}
var ErrContextCanceled = fmt.Errorf("context canceled")
func (a *Agent) Stop() {
a.Lock()
defer a.Unlock()
a.context.Cancel()
}
func (a *Agent) consumeJob(job *Job, role string) {
// Consume the job and generate a response
a.Lock()
// Set the action context
ctx, cancel := context.WithCancel(context.Background())
a.actionContext = action.NewContext(ctx, cancel)
a.Unlock()
if job.Image != "" {
// TODO: Use llava to explain the image content
}
if job.Text != "" {
a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
Role: role,
Content: job.Text,
})
}
// choose an action first
var chosenAction Action
var reasoning string
if a.currentReasoning != "" && a.nextAction != nil {
// if we are being re-evaluated, we already have the action
// and the reasoning. Consume it here and reset it
chosenAction = a.nextAction
reasoning = a.currentReasoning
a.currentReasoning = ""
a.nextAction = nil
} else {
var err error
chosenAction, reasoning, err = a.pickAction(ctx, pickActionTemplate, a.currentConversation)
if err != nil {
job.Result.Finish(err)
return
}
}
if chosenAction == nil || chosenAction.Definition().Name.Is(action.ReplyActionName) {
job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""})
job.Result.Finish(nil)
return
}
params, err := a.generateParameters(ctx, chosenAction, a.currentConversation)
if err != nil {
job.Result.Finish(err)
return
}
if !job.Callback(ActionCurrentState{chosenAction, params.actionParams, reasoning}) {
job.Result.SetResult(ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, "stopped by callback"})
job.Result.Finish(nil)
return
}
if params.actionParams == nil {
job.Result.Finish(fmt.Errorf("no parameters"))
return
}
var result string
for _, action := range a.options.actions {
if action.Definition().Name == chosenAction.Definition().Name {
if result, err = action.Run(params.actionParams); err != nil {
job.Result.Finish(fmt.Errorf("error running action: %w", err))
return
}
}
}
stateResult := ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, result}
job.Result.SetResult(stateResult)
job.CallbackWithResult(stateResult)
// calling the function
a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
Role: "assistant",
FunctionCall: &openai.FunctionCall{
Name: chosenAction.Definition().Name.String(),
Arguments: params.actionParams.String(),
},
})
// result of calling the function
a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleTool,
Content: result,
Name: chosenAction.Definition().Name.String(),
ToolCallID: chosenAction.Definition().Name.String(),
})
//a.currentConversation = append(a.currentConversation, messages...)
//a.currentConversation = messages
// given the result, we can now ask OpenAI to complete the conversation or
// to continue using another tool given the result
followingAction, reasoning, err := a.pickAction(ctx, reEvalTemplate, a.currentConversation)
if err != nil {
job.Result.Finish(fmt.Errorf("error picking action: %w", err))
return
}
if followingAction != nil &&
!followingAction.Definition().Name.Is(action.ReplyActionName) &&
!chosenAction.Definition().Name.Is(action.ReplyActionName) {
// We need to do another action (?)
// The agent decided to do another action
// call ourselves again
a.currentReasoning = reasoning
a.nextAction = followingAction
job.Text = ""
a.consumeJob(job, role)
return
}
// Generate a human-readable response
resp, err := a.client.CreateChatCompletion(ctx,
openai.ChatCompletionRequest{
Model: a.options.LLMAPI.Model,
Messages: a.currentConversation,
},
)
if err != nil {
job.Result.Finish(err)
return
}
if len(resp.Choices) != 1 {
job.Result.Finish(fmt.Errorf("no enough choices: %w", err))
return
}
// display OpenAI's response to the original question utilizing our function
msg := resp.Choices[0].Message
a.currentConversation = append(a.currentConversation, msg)
job.Result.Finish(nil)
}
func (a *Agent) periodicallyRun() {
// Here the LLM could decide to store some part of the conversation too in the memory
evaluateMemory := NewJob(
WithText(
`Evaluate the current conversation and decide if we need to store some relevant informations from it`,
))
a.consumeJob(evaluateMemory, SystemRole)
a.ResetConversation()
// Here we go in a loop of
// - asking the agent to do something
// - evaluating the result
// - asking the agent to do something else based on the result
whatNext := NewJob(WithText("What should I do next?"))
a.consumeJob(whatNext, SystemRole)
doWork := NewJob(WithText("Try to fullfill our goals automatically"))
a.consumeJob(doWork, SystemRole)
results := []string{}
for _, v := range doWork.Result.State {
results = append(results, v.Result)
}
a.ResetConversation()
// Here the LLM could decide to do something based on the result of our automatic action
evaluateAction := NewJob(
WithText(
`Evaluate the current situation and decide if we need to execute other tools (for instance to store results into permanent, or short memory).
We have done the following actions:
` + strings.Join(results, "\n"),
))
a.consumeJob(evaluateAction, SystemRole)
a.ResetConversation()
}
func (a *Agent) Run() error {
// The agent run does two things:
// picks up requests from a queue
// and generates a response/perform actions
// It is also preemptive.
// That is, it can interrupt the current action
// if another one comes in.
// If there is no action, periodically evaluate if it has to do something on its own.
// Expose a REST API to interact with the agent to ask it things
todoTimer := time.NewTicker(1 * time.Minute)
for {
select {
case job := <-a.selfJobQueue:
// XXX: is it needed?
a.consumeJob(job, SystemRole)
case job := <-a.jobQueue:
// Consume the job and generate a response
// TODO: Give a short-term memory to the agent
a.consumeJob(job, UserRole)
case <-a.context.Done():
// Agent has been canceled, return error
return ErrContextCanceled
case <-todoTimer.C:
a.periodicallyRun()
}
}
}