From e6090c62cfd282ad36dab2cc626782c001eb991d Mon Sep 17 00:00:00 2001 From: mudler Date: Wed, 3 Apr 2024 18:04:50 +0200 Subject: [PATCH] Split character from state --- agent/agent.go | 238 +++++++++++++++++++++++++++++++++++++++----- agent/agent_test.go | 29 ++++-- agent/jobs.go | 174 -------------------------------- agent/options.go | 14 +-- agent/state.go | 42 ++++---- agent/state_test.go | 12 --- llm/json.go | 1 - 7 files changed, 261 insertions(+), 249 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index b1d8097..f99e006 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -11,15 +11,48 @@ import ( "github.com/sashabaranov/go-openai" ) +const pickActionTemplate = `You can take any of the following tools: + +{{range .Actions -}} +- {{.Name}}: {{.Description }} +{{ end }} +To answer back to the user, use the "reply" tool. +Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'. + +{{range .Messages -}} +{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}} +{{end}} +` + +const reEvalTemplate = `You can take any of the following tools: + +{{range .Actions -}} +- {{.Name}}: {{.Description }} +{{ end }} +To answer back to the user, use the "reply" tool. +Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'. + +{{range .Messages -}} +{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}} +{{end}} + +We already have called tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.` + +const ( + UserRole = "user" + AssistantRole = "assistant" + SystemRole = "system" +) + type Agent struct { sync.Mutex - options *options - Character Character - client *openai.Client - jobQueue chan *Job - actionContext *action.ActionContext - context *action.ActionContext - availableActions []Action + options *options + Character Character + client *openai.Client + jobQueue, selfJobQueue chan *Job + actionContext *action.ActionContext + context *action.ActionContext + availableActions []Action currentReasoning string nextAction Action @@ -45,6 +78,7 @@ func New(opts ...Option) (*Agent, error) { ctx, cancel := context.WithCancel(c) a := &Agent{ jobQueue: make(chan *Job), + selfJobQueue: make(chan *Job), options: options, client: client, Character: options.character, @@ -61,10 +95,20 @@ func New(opts ...Option) (*Agent, error) { return a, nil } +// StopAction stops the current action +// if any. Can be called before adding a new job. +func (a *Agent) StopAction() { + a.Lock() + defer a.Unlock() + if a.actionContext != nil { + a.actionContext.Cancel() + } +} + // Ask is a pre-emptive, blocking call that returns the response as soon as it's ready. // It discards any other computation. func (a *Agent) Ask(opts ...JobOption) []ActionState { - //a.StopAction() + a.StopAction() j := NewJob(opts...) // fmt.Println("Job created", text) a.jobQueue <- j @@ -92,6 +136,158 @@ func (a *Agent) Stop() { a.context.Cancel() } +func (a *Agent) consumeJob(job *Job, role string) { + // Consume the job and generate a response + a.Lock() + // Set the action context + ctx, cancel := context.WithCancel(context.Background()) + a.actionContext = action.NewContext(ctx, cancel) + a.Unlock() + + if job.Image != "" { + // TODO: Use llava to explain the image content + + } + + if job.Text != "" { + a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ + Role: role, + Content: job.Text, + }) + } + + // choose an action first + var chosenAction Action + var reasoning string + + if a.currentReasoning != "" && a.nextAction != nil { + // if we are being re-evaluated, we already have the action + // and the reasoning. Consume it here and reset it + chosenAction = a.nextAction + reasoning = a.currentReasoning + a.currentReasoning = "" + a.nextAction = nil + } else { + var err error + chosenAction, reasoning, err = a.pickAction(ctx, pickActionTemplate, a.currentConversation) + if err != nil { + job.Result.Finish(err) + return + } + } + + if chosenAction == nil || chosenAction.Definition().Name.Is(action.ReplyActionName) { + job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""}) + job.Result.Finish(nil) + return + } + + params, err := a.generateParameters(ctx, chosenAction, a.currentConversation) + if err != nil { + job.Result.Finish(err) + return + } + + if !job.Callback(ActionCurrentState{chosenAction, params.actionParams, reasoning}) { + job.Result.SetResult(ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, "stopped by callback"}) + job.Result.Finish(nil) + return + } + + if params.actionParams == nil { + job.Result.Finish(fmt.Errorf("no parameters")) + return + } + + var result string + for _, action := range a.options.actions { + if action.Definition().Name == chosenAction.Definition().Name { + if result, err = action.Run(params.actionParams); err != nil { + job.Result.Finish(fmt.Errorf("error running action: %w", err)) + return + } + } + } + + stateResult := ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, result} + job.Result.SetResult(stateResult) + job.CallbackWithResult(stateResult) + + // calling the function + a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ + Role: "assistant", + FunctionCall: &openai.FunctionCall{ + Name: chosenAction.Definition().Name.String(), + Arguments: params.actionParams.String(), + }, + }) + + // result of calling the function + a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleTool, + Content: result, + Name: chosenAction.Definition().Name.String(), + ToolCallID: chosenAction.Definition().Name.String(), + }) + + //a.currentConversation = append(a.currentConversation, messages...) + //a.currentConversation = messages + + // given the result, we can now ask OpenAI to complete the conversation or + // to continue using another tool given the result + followingAction, reasoning, err := a.pickAction(ctx, reEvalTemplate, a.currentConversation) + if err != nil { + job.Result.Finish(fmt.Errorf("error picking action: %w", err)) + return + } + + if followingAction != nil && + !followingAction.Definition().Name.Is(action.ReplyActionName) && + !chosenAction.Definition().Name.Is(action.ReplyActionName) { + // We need to do another action (?) + // The agent decided to do another action + // call ourselves again + a.currentReasoning = reasoning + a.nextAction = followingAction + job.Text = "" + a.consumeJob(job, role) + return + } + + // Generate a human-readable response + resp, err := a.client.CreateChatCompletion(ctx, + openai.ChatCompletionRequest{ + Model: a.options.LLMAPI.Model, + Messages: a.currentConversation, + }, + ) + + if err != nil { + job.Result.Finish(err) + return + } + + if len(resp.Choices) != 1 { + job.Result.Finish(fmt.Errorf("no enough choices: %w", err)) + return + } + + // display OpenAI's response to the original question utilizing our function + msg := resp.Choices[0].Message + + a.currentConversation = append(a.currentConversation, msg) + job.Result.Finish(nil) +} + +func (a *Agent) periodicallyRun() { + a.consumeJob(NewJob(WithText("What should I do next?")), SystemRole) + // TODO: decide to do something on its own with the conversation result + // before clearing it out + + // Clear the conversation + // a.currentConversation = []openai.ChatCompletionMessage{} +} + func (a *Agent) Run() error { // The agent run does two things: // picks up requests from a queue @@ -105,33 +301,23 @@ func (a *Agent) Run() error { // Expose a REST API to interact with the agent to ask it things - clearConvTimer := time.NewTicker(1 * time.Minute) + todoTimer := time.NewTicker(1 * time.Minute) for { select { + case job := <-a.selfJobQueue: + + // XXX: is it needed? + a.consumeJob(job, SystemRole) case job := <-a.jobQueue: // Consume the job and generate a response // TODO: Give a short-term memory to the agent - a.consumeJob(job) + a.consumeJob(job, UserRole) case <-a.context.Done(): // Agent has been canceled, return error return ErrContextCanceled - case <-clearConvTimer.C: - // TODO: decide to do something on its own with the conversation result - // before clearing it out - - // Clear the conversation - // a.currentConversation = []openai.ChatCompletionMessage{} + case <-todoTimer.C: + a.periodicallyRun() } } } - -// StopAction stops the current action -// if any. Can be called before adding a new job. -func (a *Agent) StopAction() { - a.Lock() - defer a.Unlock() - if a.actionContext != nil { - a.actionContext.Cancel() - } -} diff --git a/agent/agent_test.go b/agent/agent_test.go index b52d4a2..10c83da 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -16,6 +16,18 @@ const testActionResult3 = "In paris it's very cold today, it is 2C and the humid var _ Action = &TestAction{} +var debugOptions = []JobOption{ + WithReasoningCallback(func(state ActionCurrentState) bool { + fmt.Println("Reasoning", state) + return true + }), + WithResultCallback(func(state ActionState) { + fmt.Println("Reasoning", state.Reasoning) + fmt.Println("Action", state.Action) + fmt.Println("Result", state.Result) + }), +} + type TestAction struct { response []string responseN int @@ -64,11 +76,9 @@ var _ = Describe("Agent test", func() { go agent.Run() defer agent.Stop() res := agent.Ask( - WithReasoningCallback(func(state ActionCurrentState) bool { - fmt.Println("Reasoning", state) - return true - }), - WithText("can you get the weather in boston, and afterward of Milano, Italy?"), + append(debugOptions, + WithText("can you get the weather in boston, and afterward of Milano, Italy?"), + )..., ) reasons := []string{} for _, r := range res { @@ -78,7 +88,10 @@ var _ = Describe("Agent test", func() { Expect(reasons).To(ContainElement(testActionResult2), fmt.Sprint(res)) reasons = []string{} - res = agent.Ask(WithText("Now I want to know the weather in Paris")) + res = agent.Ask( + append(debugOptions, + WithText("Now I want to know the weather in Paris"), + )...) conversation := agent.CurrentConversation() Expect(len(conversation)).To(Equal(10), fmt.Sprint(conversation)) for _, r := range res { @@ -93,6 +106,7 @@ var _ = Describe("Agent test", func() { agent, err := New( WithLLMAPIURL(apiModel), WithModel(testModel), + // WithRandomIdentity(), WithActions(&TestAction{response: []string{testActionResult}}), ) @@ -100,7 +114,8 @@ var _ = Describe("Agent test", func() { go agent.Run() defer agent.Stop() res := agent.Ask( - WithText("can you get the weather in boston?"), + append(debugOptions, + WithText("can you get the weather in boston?"))..., ) reasons := []string{} for _, r := range res { diff --git a/agent/jobs.go b/agent/jobs.go index 08a9b3b..8de6772 100644 --- a/agent/jobs.go +++ b/agent/jobs.go @@ -1,12 +1,7 @@ package agent import ( - "context" - "fmt" "sync" - - "github.com/mudler/local-agent-framework/action" - "github.com/sashabaranov/go-openai" ) // Job is a request to the agent to do something @@ -116,172 +111,3 @@ func (j *JobResult) WaitResult() []ActionState { defer j.Unlock() return j.State } - -const pickActionTemplate = `You can take any of the following tools: - -{{range .Actions -}} -- {{.Name}}: {{.Description }} -{{ end }} -To answer back to the user, use the "reply" tool. -Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'. - -{{range .Messages -}} -{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}} -{{end}} -` - -const reEvalTemplate = `You can take any of the following tools: - -{{range .Actions -}} -- {{.Name}}: {{.Description }} -{{ end }} -To answer back to the user, use the "reply" tool. -Given the text below, decide which action to take and explain the detailed reasoning behind it. For answering without picking a choice, reply with 'none'. - -{{range .Messages -}} -{{.Role}}{{if .FunctionCall}}(tool_call){{.FunctionCall}}{{end}}: {{if .FunctionCall}}{{.FunctionCall}}{{else if .ToolCalls -}}{{range .ToolCalls -}}{{.Name}} called with {{.Arguments}}{{end}}{{ else }}{{.Content -}}{{end}} -{{end}} - -We already have called tools. Evaluate the current situation and decide if we need to execute other tools or answer back with a result.` - -func (a *Agent) consumeJob(job *Job) { - // Consume the job and generate a response - a.Lock() - // Set the action context - ctx, cancel := context.WithCancel(context.Background()) - a.actionContext = action.NewContext(ctx, cancel) - a.Unlock() - - if job.Image != "" { - // TODO: Use llava to explain the image content - - } - - if job.Text != "" { - a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ - Role: "user", - Content: job.Text, - }) - } - - // choose an action first - var chosenAction Action - var reasoning string - - if a.currentReasoning != "" && a.nextAction != nil { - // if we are being re-evaluated, we already have the action - // and the reasoning. Consume it here and reset it - chosenAction = a.nextAction - reasoning = a.currentReasoning - a.currentReasoning = "" - a.nextAction = nil - } else { - var err error - chosenAction, reasoning, err = a.pickAction(ctx, pickActionTemplate, a.currentConversation) - if err != nil { - fmt.Printf("error picking action: %v\n", err) - return - } - } - - if chosenAction == nil || chosenAction.Definition().Name.Is(action.ReplyActionName) { - job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""}) - return - } - - params, err := a.generateParameters(ctx, chosenAction, a.currentConversation) - if err != nil { - fmt.Printf("error generating parameters: %v\n", err) - return - } - - if !job.Callback(ActionCurrentState{chosenAction, params.actionParams, reasoning}) { - fmt.Println("Stop from callback") - job.Result.SetResult(ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, "stopped by callback"}) - return - } - - if params.actionParams == nil { - fmt.Println("no parameters") - return - } - - var result string - for _, action := range a.options.actions { - fmt.Println("Checking action: ", action.Definition().Name, chosenAction.Definition().Name) - if action.Definition().Name == chosenAction.Definition().Name { - fmt.Printf("Running action: %v\n", action.Definition().Name) - fmt.Printf("With parameters: %v\n", params.actionParams) - if result, err = action.Run(params.actionParams); err != nil { - fmt.Printf("error running action: %v\n", err) - return - } - } - } - fmt.Printf("Action run result: %v\n", result) - stateResult := ActionState{ActionCurrentState{chosenAction, params.actionParams, reasoning}, result} - job.Result.SetResult(stateResult) - job.CallbackWithResult(stateResult) - - // calling the function - a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ - Role: "assistant", - FunctionCall: &openai.FunctionCall{ - Name: chosenAction.Definition().Name.String(), - Arguments: params.actionParams.String(), - }, - }) - - // result of calling the function - a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleTool, - Content: result, - Name: chosenAction.Definition().Name.String(), - ToolCallID: chosenAction.Definition().Name.String(), - }) - - //a.currentConversation = append(a.currentConversation, messages...) - //a.currentConversation = messages - - // given the result, we can now ask OpenAI to complete the conversation or - // to continue using another tool given the result - followingAction, reasoning, err := a.pickAction(ctx, reEvalTemplate, a.currentConversation) - if err != nil { - fmt.Printf("error picking action: %v\n", err) - return - } - - if followingAction == nil || followingAction.Definition().Name.Is(action.ReplyActionName) { - fmt.Println("No action to do, just reply") - } else if !chosenAction.Definition().Name.Is(action.ReplyActionName) { - // We need to do another action (?) - // The agent decided to do another action - // call ourselves again - a.currentReasoning = reasoning - a.nextAction = followingAction - job.Text = "" - a.consumeJob(job) - return - } - - // Generate a human-readable response - resp, err := a.client.CreateChatCompletion(ctx, - openai.ChatCompletionRequest{ - Model: a.options.LLMAPI.Model, - Messages: a.currentConversation, - }, - ) - if err != nil || len(resp.Choices) != 1 { - fmt.Printf("2nd completion error: err:%v len(choices):%v\n", err, - len(resp.Choices)) - return - } - - // display OpenAI's response to the original question utilizing our function - msg := resp.Choices[0].Message - fmt.Printf("OpenAI answered the original request with: %v\n", - msg.Content) - - a.currentConversation = append(a.currentConversation, msg) - job.Result.Finish(nil) -} diff --git a/agent/options.go b/agent/options.go index bb25539..4cc3584 100644 --- a/agent/options.go +++ b/agent/options.go @@ -29,15 +29,11 @@ func defaultOptions() *options { Model: "echidna", }, character: Character{ - Name: "John Doe", - Age: 0, - Occupation: "Unemployed", - NowDoing: "Nothing", - DoingNext: "Nothing", - DoneHistory: []string{}, - Memories: []string{}, - Hobbies: []string{}, - MusicTaste: []string{}, + Name: "John Doe", + Age: 0, + Occupation: "Unemployed", + Hobbies: []string{}, + MusicTaste: []string{}, }, } } diff --git a/agent/state.go b/agent/state.go index 2a2d279..2456e6f 100644 --- a/agent/state.go +++ b/agent/state.go @@ -12,20 +12,34 @@ import ( // all information that should be displayed to the LLM // in the prompts type PromptHUD struct { - Character Character `json:"character"` + Character Character `json:"character"` + CurrentState State `json:"current_state"` } -type Character struct { - Name string `json:"name"` - Age int `json:"age"` - Occupation string `json:"job_occupation"` +// State is the structure +// that is used to keep track of the current state +// and the Agent's short memory that it can update +// Besides a long term memory that is accessible by the agent (With vector database), +// And a context memory (that is always powered by a vector database), +// this memory is the shorter one that the LLM keeps across conversation and across its +// reasoning process's and life time. +// A special action is then used to let the LLM itself update its memory +// periodically during self-processing, and the same action is ALSO exposed +// during the conversation to let the user put for example, a new goal to the agent. +type State struct { NowDoing string `json:"doing_now"` DoingNext string `json:"doing_next"` DoneHistory []string `json:"done_history"` Memories []string `json:"memories"` - Hobbies []string `json:"hobbies"` - MusicTaste []string `json:"music_taste"` - Sex string `json:"sex"` +} + +type Character struct { + Name string `json:"name"` + Age int `json:"age"` + Occupation string `json:"job_occupation"` + Hobbies []string `json:"hobbies"` + MusicTaste []string `json:"music_taste"` + Sex string `json:"sex"` } func Load(path string) (*Character, error) { @@ -69,10 +83,6 @@ func (a *Agent) validCharacter() bool { return a.Character.Name != "" && a.Character.Age != 0 && a.Character.Occupation != "" && - a.Character.NowDoing != "" && - a.Character.DoingNext != "" && - len(a.Character.DoneHistory) != 0 && - len(a.Character.Memories) != 0 && len(a.Character.Hobbies) != 0 && len(a.Character.MusicTaste) != 0 } @@ -81,10 +91,6 @@ const fmtT = `===================== Name: %s Age: %d Occupation: %s -Now doing: %s -Doing next: %s -Done history: %v -Memories: %v Hobbies: %v Music taste: %v =====================` @@ -95,10 +101,6 @@ func (a *Agent) String() string { a.Character.Name, a.Character.Age, a.Character.Occupation, - a.Character.NowDoing, - a.Character.DoingNext, - a.Character.DoneHistory, - a.Character.Memories, a.Character.Hobbies, a.Character.MusicTaste, ) diff --git a/agent/state_test.go b/agent/state_test.go index d5d40e9..85ec864 100644 --- a/agent/state_test.go +++ b/agent/state_test.go @@ -1,8 +1,6 @@ package agent_test import ( - "fmt" - . "github.com/mudler/local-agent-framework/agent" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -21,13 +19,8 @@ var _ = Describe("Agent test", func() { Expect(agent.Character.Name).ToNot(BeEmpty()) Expect(agent.Character.Age).ToNot(BeZero()) Expect(agent.Character.Occupation).ToNot(BeEmpty()) - Expect(agent.Character.NowDoing).ToNot(BeEmpty()) - Expect(agent.Character.DoingNext).ToNot(BeEmpty()) - Expect(agent.Character.DoneHistory).ToNot(BeEmpty()) - Expect(agent.Character.Memories).ToNot(BeEmpty()) Expect(agent.Character.Hobbies).ToNot(BeEmpty()) Expect(agent.Character.MusicTaste).ToNot(BeEmpty()) - fmt.Println(agent.String()) }) It("detect an invalid character", func() { _, err := New(WithRandomIdentity()) @@ -43,13 +36,8 @@ var _ = Describe("Agent test", func() { Expect(agent.Character.Name).ToNot(BeEmpty()) Expect(agent.Character.Age).ToNot(BeZero()) Expect(agent.Character.Occupation).ToNot(BeEmpty()) - Expect(agent.Character.NowDoing).ToNot(BeEmpty()) - Expect(agent.Character.DoingNext).ToNot(BeEmpty()) - Expect(agent.Character.DoneHistory).ToNot(BeEmpty()) - Expect(agent.Character.Memories).ToNot(BeEmpty()) Expect(agent.Character.Hobbies).ToNot(BeEmpty()) Expect(agent.Character.MusicTaste).ToNot(BeEmpty()) - fmt.Println(agent.String()) }) }) }) diff --git a/llm/json.go b/llm/json.go index ded3a51..80a87f2 100644 --- a/llm/json.go +++ b/llm/json.go @@ -30,7 +30,6 @@ func GenerateJSON(ctx context.Context, client *openai.Client, model, text string return fmt.Errorf("no response from OpenAI API") } - fmt.Println(resp.Choices[0].Message.Content) err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), i) if err != nil { return err