Split character from state

2024-04-03 18:04:50 +02:00
parent a404b75fbe
commit e6090c62cf
7 changed files with 261 additions and 249 deletions
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -11,15 +11,48 @@ import (
 	"github.com/sashabaranov/go-openai"
 )

+const pickActionTemplate = `You can take any of the following tools: 
+
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+To answer back to the user, use the "reply" tool.
+Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
+
+{{range .Messages -}}
+{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
+{{end}}
+`
+
+const reEvalTemplate = `You can take any of the following tools: 
+
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+To answer back to the user, use the "reply" tool.
+Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
+
+{{range .Messages -}}
+{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
+{{end}}
+
+We already have called tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.`
+
+const (
+	UserRole      = "user"
+	AssistantRole = "assistant"
+	SystemRole    = "system"
+)
+
 type Agent struct {
 	sync.Mutex
-	options          *options
-	Character        Character
-	client           *openai.Client
-	jobQueue         chan *Job
-	actionContext    *action.ActionContext
-	context          *action.ActionContext
-	availableActions []Action
+	options                *options
+	Character              Character
+	client                 *openai.Client
+	jobQueue, selfJobQueue chan *Job
+	actionContext          *action.ActionContext
+	context                *action.ActionContext
+	availableActions       []Action

 	currentReasoning    string
 	nextAction          Action
@@ -45,6 +78,7 @@ func New(opts ...Option) (*Agent, error) {
 	ctx, cancel := context.WithCancel(c)
 	a := &Agent{
 		jobQueue:         make(chan *Job),
+		selfJobQueue:     make(chan *Job),
 		options:          options,
 		client:           client,
 		Character:        options.character,
@@ -61,10 +95,20 @@ func New(opts ...Option) (*Agent, error) {
 	return a, nil
 }

+// StopAction stops the current action
+// if any. Can be called before adding a new job.
+func (a *Agent) StopAction() {
+	a.Lock()
+	defer a.Unlock()
+	if a.actionContext != nil {
+		a.actionContext.Cancel()
+	}
+}
+
 // Ask is a pre-emptive, blocking call that returns the response as soon as it's ready.
 // It discards any other computation.
 func (a *Agent) Ask(opts ...JobOption) []ActionState {
-	//a.StopAction()
+	a.StopAction()
 	j := NewJob(opts...)
 	//	fmt.Println("Job created", text)
 	a.jobQueue <- j
@@ -92,6 +136,158 @@ func (a *Agent) Stop() {
 	a.context.Cancel()
 }

+func (a *Agent) consumeJob(job *Job, role string) {
+	// Consume the job and generate a response
+	a.Lock()
+	// Set the action context
+	ctx, cancel := context.WithCancel(context.Background())
+	a.actionContext = action.NewContext(ctx, cancel)
+	a.Unlock()
+
+	if job.Image != "" {
+		// TODO: Use llava to explain the image content
+
+	}
+
+	if job.Text != "" {
+		a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
+			Role:    role,
+			Content: job.Text,
+		})
+	}
+
+	// choose an action first
+	var chosenAction Action
+	var reasoning string
+
+	if a.currentReasoning != "" && a.nextAction != nil {
+		// if we are being re-evaluated, we already have the action
+		// and the reasoning. Consume it here and reset it
+		chosenAction = a.nextAction
+		reasoning = a.currentReasoning
+		a.currentReasoning = ""
+		a.nextAction = nil
+	} else {
+		var err error
+		chosenAction, reasoning, err = a.pickAction(ctx, pickActionTemplate, a.currentConversation)
+		if err != nil {
+			job.Result.Finish(err)
+			return
+		}
+	}
+
+	if chosenAction == nil || chosenAction.Definition().Name.Is(action.ReplyActionName) {
+		job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""})
+		job.Result.Finish(nil)
+		return
+	}
+
+	params, err := a.generateParameters(ctx, chosenAction, a.currentConversation)
+	if err != nil {
+		job.Result.Finish(err)
+		return
+	}
+
+	if !job.Callback(ActionCurrentState{chosenAction, params.actionParams, reasoning}) {
+		job.Result.SetResult(ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, "stopped by callback"})
+		job.Result.Finish(nil)
+		return
+	}
+
+	if params.actionParams == nil {
+		job.Result.Finish(fmt.Errorf("no parameters"))
+		return
+	}
+
+	var result string
+	for _, action := range a.options.actions {
+		if action.Definition().Name == chosenAction.Definition().Name {
+			if result, err = action.Run(params.actionParams); err != nil {
+				job.Result.Finish(fmt.Errorf("error running action: %w", err))
+				return
+			}
+		}
+	}
+
+	stateResult := ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, result}
+	job.Result.SetResult(stateResult)
+	job.CallbackWithResult(stateResult)
+
+	// calling the function
+	a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
+		Role: "assistant",
+		FunctionCall: &openai.FunctionCall{
+			Name:      chosenAction.Definition().Name.String(),
+			Arguments: params.actionParams.String(),
+		},
+	})
+
+	// result of calling the function
+	a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
+		Role:       openai.ChatMessageRoleTool,
+		Content:    result,
+		Name:       chosenAction.Definition().Name.String(),
+		ToolCallID: chosenAction.Definition().Name.String(),
+	})
+
+	//a.currentConversation = append(a.currentConversation, messages...)
+	//a.currentConversation = messages
+
+	// given the result, we can now ask OpenAI to complete the conversation or
+	// to continue using another tool given the result
+	followingAction, reasoning, err := a.pickAction(ctx, reEvalTemplate, a.currentConversation)
+	if err != nil {
+		job.Result.Finish(fmt.Errorf("error picking action: %w", err))
+		return
+	}
+
+	if followingAction != nil &&
+		!followingAction.Definition().Name.Is(action.ReplyActionName) &&
+		!chosenAction.Definition().Name.Is(action.ReplyActionName) {
+		// We need to do another action (?)
+		// The agent decided to do another action
+		// call ourselves again
+		a.currentReasoning = reasoning
+		a.nextAction = followingAction
+		job.Text = ""
+		a.consumeJob(job, role)
+		return
+	}
+
+	// Generate a human-readable response
+	resp, err := a.client.CreateChatCompletion(ctx,
+		openai.ChatCompletionRequest{
+			Model:    a.options.LLMAPI.Model,
+			Messages: a.currentConversation,
+		},
+	)
+
+	if err != nil {
+		job.Result.Finish(err)
+		return
+	}
+
+	if len(resp.Choices) != 1 {
+		job.Result.Finish(fmt.Errorf("no enough choices: %w", err))
+		return
+	}
+
+	// display OpenAI's response to the original question utilizing our function
+	msg := resp.Choices[0].Message
+
+	a.currentConversation = append(a.currentConversation, msg)
+	job.Result.Finish(nil)
+}
+
+func (a *Agent) periodicallyRun() {
+	a.consumeJob(NewJob(WithText("What should I do next?")), SystemRole)
+	// TODO: decide to do something on its own with the conversation result
+	// before clearing it out
+
+	// Clear the conversation
+	//	a.currentConversation = []openai.ChatCompletionMessage{}
+}
+
 func (a *Agent) Run() error {
 	// The agent run does two things:
 	// picks up requests from a queue
@@ -105,33 +301,23 @@ func (a *Agent) Run() error {

 	// Expose a REST API to interact with the agent to ask it things

-	clearConvTimer := time.NewTicker(1 * time.Minute)
+	todoTimer := time.NewTicker(1 * time.Minute)
 	for {
 		select {
+		case job := <-a.selfJobQueue:
+
+			// XXX: is it needed?
+			a.consumeJob(job, SystemRole)
 		case job := <-a.jobQueue:

 			// Consume the job and generate a response
 			// TODO: Give a short-term memory to the agent
-			a.consumeJob(job)
+			a.consumeJob(job, UserRole)
 		case <-a.context.Done():
 			// Agent has been canceled, return error
 			return ErrContextCanceled
-		case <-clearConvTimer.C:
-			// TODO: decide to do something on its own with the conversation result
-			// before clearing it out
-
-			// Clear the conversation
-			//	a.currentConversation = []openai.ChatCompletionMessage{}
+		case <-todoTimer.C:
+			a.periodicallyRun()
 		}
 	}
 }
-
-// StopAction stops the current action
-// if any. Can be called before adding a new job.
-func (a *Agent) StopAction() {
-	a.Lock()
-	defer a.Unlock()
-	if a.actionContext != nil {
-		a.actionContext.Cancel()
-	}
-}