From 79e5dffe093ee94d52b03284eb4594decf226392 Mon Sep 17 00:00:00 2001
From: mudler <mudler@localai.io>
Date: Thu, 4 Apr 2024 20:00:58 +0200
Subject: [PATCH] wip

---
 agent/actions.go    | 103 ++++++++++++++++++++++----------------------
 agent/agent.go      |  99 +++++++++++++++++++-----------------------
 agent/agent_test.go |  27 ++++++++++++
 agent/templates.go  |  65 ++++++++++++++++++++++++++++
 4 files changed, 187 insertions(+), 107 deletions(-)
 create mode 100644 agent/templates.go

diff --git a/agent/actions.go b/agent/actions.go
index 65aa40e..3d170ef 100644
--- a/agent/actions.go
+++ b/agent/actions.go
@@ -85,19 +85,28 @@ func (a *Agent) decision(
 	return &decisionResult{actionParams: params}, nil
 }
 
-func (a *Agent) generateParameters(ctx context.Context, action Action, conversation []openai.ChatCompletionMessage) (*decisionResult, error) {
+func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act Action, c []openai.ChatCompletionMessage, reasoning string) (*decisionResult, error) {
+	conversation, _, _, err := a.prepareConversationParse(pickTemplate, c, false, reasoning)
+	if err != nil {
+		return nil, err
+	}
+
 	return a.decision(ctx,
 		conversation,
 		a.systemActions().ToTools(),
-		action.Definition().Name)
+		act.Definition().Name)
+}
+
+func (a *Agent) systemInternalActions() Actions {
+	if a.options.enableHUD {
+		return append(a.options.userActions, action.NewState())
+	}
+
+	return append(a.options.userActions)
 }
 
 func (a *Agent) systemActions() Actions {
-	if a.options.enableHUD {
-		return append(a.options.userActions, action.NewReply(), action.NewState())
-	}
-
-	return append(a.options.userActions, action.NewReply())
+	return append(a.systemInternalActions(), action.NewReply())
 }
 
 func (a *Agent) prepareHUD() PromptHUD {
@@ -108,83 +117,73 @@ func (a *Agent) prepareHUD() PromptHUD {
 	}
 }
 
-const hudTemplate = `You have a character and your replies and actions might be influenced by it.
-{{if .Character.Name}}Name: {{.Character.Name}}
-{{end}}{{if .Character.Age}}Age: {{.Character.Age}}
-{{end}}{{if .Character.Occupation}}Occupation: {{.Character.Occupation}}
-{{end}}{{if .Character.Hobbies}}Hobbies: {{.Character.Hobbies}}
-{{end}}{{if .Character.MusicTaste}}Music taste: {{.Character.MusicTaste}}
-{{end}}
-
-This is your current state:
-NowDoing: {{if .CurrentState.NowDoing}}{{.CurrentState.NowDoing}}{{else}}Nothing{{end}}
-DoingNext: {{if .CurrentState.DoingNext}}{{.CurrentState.DoingNext}}{{else}}Nothing{{end}}
-Your permanent goal is: {{if .PermanentGoal}}{{.PermanentGoal}}{{else}}Nothing{{end}}
-Your current goal is: {{if .CurrentState.Goal}}{{.CurrentState.Goal}}{{else}}Nothing{{end}}
-You have done: {{range .CurrentState.DoneHistory}}{{.}} {{end}}
-You have a short memory with: {{range .CurrentState.Memories}}{{.}} {{end}}`
-
-// pickAction picks an action based on the conversation
-func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.ChatCompletionMessage) (Action, string, error) {
+func (a *Agent) prepareConversationParse(templ string, messages []openai.ChatCompletionMessage, canReply bool, reasoning string) ([]openai.ChatCompletionMessage, Actions, []string, error) {
 	// prepare the prompt
 	prompt := bytes.NewBuffer([]byte{})
-	hud := bytes.NewBuffer([]byte{})
 
 	promptTemplate, err := template.New("pickAction").Parse(templ)
 	if err != nil {
-		return nil, "", err
+		return nil, []Action{}, nil, err
 	}
-	hudTmpl, err := template.New("HUD").Parse(hudTemplate)
-	if err != nil {
-		return nil, "", err
+
+	actions := a.systemActions()
+	if !canReply {
+		actions = a.systemInternalActions()
 	}
+
 	// Get all the actions definitions
 	definitions := []action.ActionDefinition{}
-	for _, m := range a.systemActions() {
+	for _, m := range actions {
 		definitions = append(definitions, m.Definition())
 	}
 
-	err = promptTemplate.Execute(prompt, struct {
-		Actions  []action.ActionDefinition
-		Messages []openai.ChatCompletionMessage
-	}{
-		Actions:  definitions,
-		Messages: messages,
-	})
-	if err != nil {
-		return nil, "", err
+	var promptHUD *PromptHUD
+	if a.options.enableHUD {
+		h := a.prepareHUD()
+		promptHUD = &h
 	}
 
-	err = hudTmpl.Execute(hud, a.prepareHUD())
+	err = promptTemplate.Execute(prompt, struct {
+		HUD       *PromptHUD
+		Actions   []action.ActionDefinition
+		Reasoning string
+		Messages  []openai.ChatCompletionMessage
+	}{
+		Actions:   definitions,
+		Reasoning: reasoning,
+		Messages:  messages,
+		HUD:       promptHUD,
+	})
 	if err != nil {
-		return nil, "", err
+		return nil, []Action{}, nil, err
 	}
 
 	if a.options.debugMode {
-		fmt.Println("=== HUD START ===", hud.String(), "=== HUD END ===")
 		fmt.Println("=== PROMPT START ===", prompt.String(), "=== PROMPT END ===")
 	}
 
 	// Get all the available actions IDs
 	actionsID := []string{}
-	for _, m := range a.systemActions() {
+	for _, m := range actions {
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}
 
 	conversation := []openai.ChatCompletionMessage{}
 
-	if a.options.enableHUD {
-		conversation = append(conversation, openai.ChatCompletionMessage{
-			Role:    "system",
-			Content: hud.String(),
-		})
-	}
-
 	conversation = append(conversation, openai.ChatCompletionMessage{
 		Role:    "user",
 		Content: prompt.String(),
 	})
 
+	return conversation, actions, actionsID, nil
+}
+
+// pickAction picks an action based on the conversation
+func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.ChatCompletionMessage, canReply bool) (Action, string, error) {
+	conversation, actions, actionsID, err := a.prepareConversationParse(templ, messages, canReply, "")
+	if err != nil {
+		return nil, "", err
+	}
 	// Get the LLM to think on what to do
 	thought, err := a.decision(ctx,
 		conversation,
@@ -234,7 +233,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 	}
 
 	// Find the action
-	chosenAction := a.systemActions().Find(actionChoice.Tool)
+	chosenAction := actions.Find(actionChoice.Tool)
 	if chosenAction == nil {
 		return nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
 	}
diff --git a/agent/agent.go b/agent/agent.go
index c551163..5a72715 100644
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"os"
-	"strings"
 	"sync"
 	"time"
 
@@ -13,33 +12,6 @@ import (
 	"github.com/sashabaranov/go-openai"
 )
 
-const pickActionTemplate = `You can take any of the following tools: 
-
-{{range .Actions -}}
-- {{.Name}}: {{.Description }}
-{{ end }}
-To answer back to the user, use the "reply" tool.
-Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
-
-{{range .Messages -}}
-{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
-{{end}}
-`
-
-const reEvalTemplate = `You can take any of the following tools: 
-
-{{range .Actions -}}
-- {{.Name}}: {{.Description }}
-{{ end }}
-To answer back to the user, use the "reply" tool.
-Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
-
-{{range .Messages -}}
-{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
-{{end}}
-
-We already have called tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.`
-
 const (
 	UserRole      = "user"
 	AssistantRole = "assistant"
@@ -202,6 +174,7 @@ func (a *Agent) runAction(chosenAction Action, decisionResult *decisionResult) (
 }
 
 func (a *Agent) consumeJob(job *Job, role string) {
+	selfEvaluation := role == SystemRole
 	// Consume the job and generate a response
 	a.Lock()
 	// Set the action context
@@ -221,6 +194,17 @@ func (a *Agent) consumeJob(job *Job, role string) {
 		})
 	}
 
+	var pickTemplate string
+	var reEvaluationTemplate string
+
+	if selfEvaluation {
+		pickTemplate = pickSelfTemplate
+		reEvaluationTemplate = reSelfEvalTemplate
+	} else {
+		pickTemplate = pickActionTemplate
+		reEvaluationTemplate = reEvalTemplate
+	}
+
 	// choose an action first
 	var chosenAction Action
 	var reasoning string
@@ -234,7 +218,7 @@ func (a *Agent) consumeJob(job *Job, role string) {
 		a.nextAction = nil
 	} else {
 		var err error
-		chosenAction, reasoning, err = a.pickAction(ctx, pickActionTemplate, a.currentConversation)
+		chosenAction, reasoning, err = a.pickAction(ctx, pickTemplate, a.currentConversation, true)
 		if err != nil {
 			job.Result.Finish(err)
 			return
@@ -247,7 +231,7 @@ func (a *Agent) consumeJob(job *Job, role string) {
 		return
 	}
 
-	params, err := a.generateParameters(ctx, chosenAction, a.currentConversation)
+	params, err := a.generateParameters(ctx, pickTemplate, chosenAction, a.currentConversation, reasoning)
 	if err != nil {
 		job.Result.Finish(fmt.Errorf("error generating action's parameters: %w", err))
 		return
@@ -298,7 +282,7 @@ func (a *Agent) consumeJob(job *Job, role string) {
 
 		// given the result, we can now ask OpenAI to complete the conversation or
 		// to continue using another tool given the result
-		followingAction, reasoning, err := a.pickAction(ctx, reEvalTemplate, a.currentConversation)
+		followingAction, reasoning, err := a.pickAction(ctx, reEvaluationTemplate, a.currentConversation, !selfEvaluation)
 		if err != nil {
 			job.Result.Finish(fmt.Errorf("error picking action: %w", err))
 			return
@@ -344,43 +328,48 @@ func (a *Agent) consumeJob(job *Job, role string) {
 }
 
 func (a *Agent) periodicallyRun() {
-	// Here the LLM could decide to store some part of the conversation too in the memory
-	evaluateMemory := NewJob(
-		WithText(
-			`Evaluate the current conversation and decide if we need to store some relevant informations from it`,
-		))
-	a.consumeJob(evaluateMemory, SystemRole)
+	if len(a.CurrentConversation()) != 0 {
+		// Here the LLM could decide to store some part of the conversation too in the memory
+		evaluateMemory := NewJob(
+			WithText(
+				`Evaluate the current conversation and decide if we need to store some relevant informations from it`,
+			))
+		a.consumeJob(evaluateMemory, SystemRole)
 
-	a.ResetConversation()
+		a.ResetConversation()
+	}
 
 	// Here we go in a loop of
 	// - asking the agent to do something
 	// - evaluating the result
 	// - asking the agent to do something else based on the result
 
-	whatNext := NewJob(WithText("What should I do next?"))
+	//	whatNext := NewJob(WithText("Decide what to do based on the state"))
+	whatNext := NewJob(WithText("Decide what to based on the goal and the persistent goal."))
 	a.consumeJob(whatNext, SystemRole)
 
-	doWork := NewJob(WithText("Try to fullfill our goals automatically"))
-	a.consumeJob(doWork, SystemRole)
+	// a.ResetConversation()
 
-	results := []string{}
-	for _, v := range doWork.Result.State {
-		results = append(results, v.Result)
-	}
+	// doWork := NewJob(WithText("Select the tool to use based on your goal and the current state."))
+	// a.consumeJob(doWork, SystemRole)
 
-	a.ResetConversation()
+	// results := []string{}
+	// for _, v := range doWork.Result.State {
+	// 	results = append(results, v.Result)
+	// }
 
-	// Here the LLM could decide to do something based on the result of our automatic action
-	evaluateAction := NewJob(
-		WithText(
-			`Evaluate the current situation and decide if we need to execute other tools (for instance to store results into permanent, or short memory).
-			We have done the following actions:
-			` + strings.Join(results, "\n"),
-		))
-	a.consumeJob(evaluateAction, SystemRole)
+	// a.ResetConversation()
 
-	a.ResetConversation()
+	// // Here the LLM could decide to do something based on the result of our automatic action
+	// evaluateAction := NewJob(
+	// 	WithText(
+	// 		`Evaluate the current situation and decide if we need to execute other tools (for instance to store results into permanent, or short memory).
+	// 		We have done the following actions:
+	// 		` + strings.Join(results, "\n"),
+	// 	))
+	// a.consumeJob(evaluateAction, SystemRole)
+
+	// a.ResetConversation()
 }
 
 func (a *Agent) Run() error {
diff --git a/agent/agent_test.go b/agent/agent_test.go
index b532109..bc41ff3 100644
--- a/agent/agent_test.go
+++ b/agent/agent_test.go
@@ -147,5 +147,32 @@ var _ = Describe("Agent test", func() {
 			Expect(result.Error).ToNot(HaveOccurred())
 			Expect(agent.State().Goal).To(ContainSubstring("guitar"), fmt.Sprint(agent.State()))
 		})
+
+		FIt("it automatically performs things in the background", func() {
+			agent, err := New(
+				WithLLMAPIURL(apiModel),
+				WithModel(testModel),
+				EnableHUD,
+				DebugMode,
+				EnableStandaloneJob,
+				WithRandomIdentity(),
+				WithPermanentGoal("get the weather of all the cities in italy"),
+			)
+			Expect(err).ToNot(HaveOccurred())
+			go agent.Run()
+			defer agent.Stop()
+
+			Eventually(func() string {
+				fmt.Println(agent.State())
+				return agent.State().NowDoing
+			}, "4m", "10s").Should(ContainSubstring("weather"), fmt.Sprint(agent.State()))
+
+			// result := agent.Ask(
+			// 	WithText("Update your goals such as you want to learn to play the guitar"),
+			// )
+			// fmt.Printf("%+v\n", result)
+			// Expect(result.Error).ToNot(HaveOccurred())
+			// Expect(agent.State().Goal).To(ContainSubstring("guitar"), fmt.Sprint(agent.State()))
+		})
 	})
 })
diff --git a/agent/templates.go b/agent/templates.go
new file mode 100644
index 0000000..977fc34
--- /dev/null
+++ b/agent/templates.go
@@ -0,0 +1,65 @@
+package agent
+
+const hud = `{{with .HUD }}You have a character and your replies and actions might be influenced by it.
+{{if .Character.Name}}Name: {{.Character.Name}}
+{{end}}{{if .Character.Age}}Age: {{.Character.Age}}
+{{end}}{{if .Character.Occupation}}Occupation: {{.Character.Occupation}}
+{{end}}{{if .Character.Hobbies}}Hobbies: {{.Character.Hobbies}}
+{{end}}{{if .Character.MusicTaste}}Music taste: {{.Character.MusicTaste}}
+{{end}}
+
+This is your current state:
+NowDoing: {{if .CurrentState.NowDoing}}{{.CurrentState.NowDoing}}{{else}}Nothing{{end}}
+DoingNext: {{if .CurrentState.DoingNext}}{{.CurrentState.DoingNext}}{{else}}Nothing{{end}}
+Your permanent goal is: {{if .PermanentGoal}}{{.PermanentGoal}}{{else}}Nothing{{end}}
+Your current goal is: {{if .CurrentState.Goal}}{{.CurrentState.Goal}}{{else}}Nothing{{end}}
+You have done: {{range .CurrentState.DoneHistory}}{{.}} {{end}}
+You have a short memory with: {{range .CurrentState.Memories}}{{.}} {{end}}{{end}}`
+
+const pickSelfTemplate = `
+You can take any of the following tools: 
+
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+
+{{if .Messages}}
+Consider the text below, decide which action to take and explain the detailed reasoning behind it.
+
+{{range .Messages -}}
+{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
+{{end}}
+{{end}}
+
+Act like a smart AI agent having a character, the character and your state is defined in the message above.
+You are now self-evaluating what to do next based on the state in the previous message. 
+For example, if the permanent goal is to "make a sandwich", you might want to "get the bread" first, and update the state afterwards by calling two tools in sequence.
+You can update the short-term goal, the current action, the next action, the history of actions, and the memories.
+You can't ask things to the user as you are thinking by yourself.
+
+{{if .Reasoning}}Reasoning: {{.Reasoning}}{{end}}
+` + hud
+
+const reSelfEvalTemplate = pickSelfTemplate + `
+
+We already have called other tools. Evaluate the current situation and decide if we need to execute other tools.`
+
+const pickActionTemplate = hud + `
+You can take any of the following tools: 
+
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+To answer back to the user, use the "reply" tool.
+Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'.
+
+{{range .Messages -}}
+{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}}
+{{end}}
+
+{{if .Reasoning}}Reasoning: {{.Reasoning}}{{end}}
+`
+
+const reEvalTemplate = pickActionTemplate + `
+
+We already have called other tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.`