From fd685e1855945c918708f67ab3aad8b3f14cf25c Mon Sep 17 00:00:00 2001 From: sahari Date: Sat, 9 Mar 2024 21:38:50 -0800 Subject: [PATCH] feat: add air for development and start on support for claude v3 --- .air.toml | 46 +++++++++++++++++++++ .gitignore | 2 + internal/bedrock/bedrock.go | 20 +++++++++ internal/bedrock/models/anthropic.go | 62 +++++++++++++++++++++++----- internal/bedrock/models/cohere.go | 24 +++++------ 5 files changed, 132 insertions(+), 22 deletions(-) create mode 100644 .air.toml create mode 100644 .gitignore diff --git a/.air.toml b/.air.toml new file mode 100644 index 0000000..7442b9d --- /dev/null +++ b/.air.toml @@ -0,0 +1,46 @@ +root = "." +testdata_dir = "testdata" +tmp_dir = "tmp" + +[build] + args_bin = ["server"] + bin = "./tmp/main" + cmd = "go build -o ./tmp/main ." + delay = 1000 + exclude_dir = ["assets", "tmp", "vendor", "testdata"] + exclude_file = [] + exclude_regex = ["_test.go"] + exclude_unchanged = false + follow_symlink = false + full_bin = "" + include_dir = [] + include_ext = ["go", "tpl", "tmpl", "html"] + include_file = [] + kill_delay = "0s" + log = "build-errors.log" + poll = false + poll_interval = 0 + post_cmd = [] + pre_cmd = [] + rerun = false + rerun_delay = 500 + send_interrupt = false + stop_on_error = false + +[color] + app = "" + build = "yellow" + main = "magenta" + runner = "green" + watcher = "cyan" + +[log] + main_only = false + time = false + +[misc] + clean_on_exit = false + +[screen] + clear_on_rebuild = false + keep_scroll = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad15e81 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +tmp/* +jenn-ai diff --git a/internal/bedrock/bedrock.go b/internal/bedrock/bedrock.go index db638cc..450791c 100644 --- a/internal/bedrock/bedrock.go +++ b/internal/bedrock/bedrock.go @@ -84,6 +84,26 @@ func (m *AWSModelConfig) constructPayload(message string) ([]byte, error) { TopP: m.TopP, TopK: m.TopK, } + // TODO: work on v3 + //body := models.ClaudeMessagesInput{ + // AnthropicVersion: "bedrock-2023-05-31", + // Messages: []models.ClaudeMessage{ + // { + // Role: "user", + // Content: []models.ClaudeContent{ + // { + // Type: "text", + // Text: message, + // }, + // }, + // }, + // }, + // MaxTokens: m.MaxTokens, + // Temperature: m.Temperature, + // TopP: m.TopP, + // TopK: m.TopK, + //} + payload, err := json.Marshal(body) if err != nil { return []byte{}, err diff --git a/internal/bedrock/models/anthropic.go b/internal/bedrock/models/anthropic.go index c591207..7962d70 100644 --- a/internal/bedrock/models/anthropic.go +++ b/internal/bedrock/models/anthropic.go @@ -1,18 +1,60 @@ // Package models contains structs on model requests/responses package models -// ClaudeModelInputs is needed for the request +// ClaudeModelInputs is needed to marshal the legacy request type +// Supported Models: claude-instant-v1, claude-v1, claude-v2, claude-v2:1 type ClaudeModelInputs struct { - Prompt string `json:"prompt"` - MaxTokensToSample int `json:"max_tokens_to_sample"` - Temperature float64 `json:"temperature,omitempty"` - TopP float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` + Prompt string `json:"prompt"` // The prompt that you want Claude to complete. For proper response generation you need to format your prompt using alternating \n\nHuman: and \n\nAssistant: conversational turns. + MaxTokensToSample int `json:"max_tokens_to_sample"` // The maximum number of tokens to generate before stopping. + Temperature float64 `json:"temperature,omitempty"` // The amount of randomness injected into the response. + TopP float64 `json:"top_p,omitempty"` // Use nucleus sampling. + TopK int `json:"top_k,omitempty"` // Only sample from the top K options for each subsequent token. } -// ClaudeModelOutputs contains the response +// ClaudeModelOutputs is needed to unmarshal the legacy response +// Supported Models: claude-instant-v1, claude-v1, claude-v2, claude-v2:1 type ClaudeModelOutputs struct { - Completion string `json:"completion,omitempty"` - StopReason string `json:"stop_reason,omitempty"` - Stop string `json:"stop,omitempty"` + Completion string `json:"completion,omitempty"` // The resulting completion up to and excluding the stop sequences. + StopReason string `json:"stop_reason,omitempty"` // The reason why the model stopped generating the response. + Stop string `json:"stop,omitempty"` // contains the stop sequence that signalled the model to stop generating text. +} + +// ClaudeMessagesInput is needed to marshal the new request type +// Supported Models: claude-instant-v1.2, claude-v2, claude-v2.1, claude-v3 +type ClaudeMessagesInput struct { + AnthropicVersion string `json:"anthropic_version"` // The anthropic version. The value must be bedrock-2023-05-31. + Messages []ClaudeMessage `json:"messages"` // The input messages. + MaxTokens int `json:"max_tokens"` // The maximum number of tokens to generate before stopping. + Temperature float64 `json:"temperature"` // The amount of randomness injected into the response. + TopK int `json:"top_k"` // Only sample from the top K options for each subsequent token. + TopP float64 `json:"top_p"` // Use nucleus sampling. + System string `json:"system,omitempty"` // The system prompt for the request. Which provides context, instructions, and guidelines to Claude before presenting it with a question or task. + StopSequences []string `json:"stop_sequences,omitempty"` // The stop sequences that signal the model to stop generating text. +} + +// ClaudeMessage contains messages +type ClaudeMessage struct { + Role string `json:"role"` // The role of the conversation turn. Valid values are user and assistant. + Content []ClaudeContent `json:"content"` // The content of the conversation turn. +} + +// ClaudeContent contains content +type ClaudeContent struct { + Type string `json:"type"` // The type of the content. Valid values are image and text. + Text string `json:"text,omitempty"` // The text content. + Source []ClaudeSource `json:"source,omitempty"` // The content of the conversation turn. +} + +// ClaudeSource contains source +type ClaudeSource struct { + Type string `json:"type"` // The encoding type for the image. You can specify base64. + MediaType string `json:"media_type"` // The type of the image. You can specify the following image formats. + Data string `json:"data"` // The base64 encoded image bytes for the image. The maximum image size is 3.75MB. +} + +// ClaudeMessagesOutput is needed to unmarshal the new request type +// Supported Models: claude-instant-v1.2, claude-v2, claude-v2.1, claude-v3 +type ClaudeMessagesOutput struct { + Content []ClaudeContent `json:"content"` // The content generated by the model. + StopReason string `json:"stop_reason"` // The reason why Anthropic Claude stopped generating the response. } diff --git a/internal/bedrock/models/cohere.go b/internal/bedrock/models/cohere.go index 949742b..13cf2af 100644 --- a/internal/bedrock/models/cohere.go +++ b/internal/bedrock/models/cohere.go @@ -3,24 +3,24 @@ package models // CommandModelInput contains the request type CommandModelInput struct { - Prompt string `json:"prompt"` - Temperature float64 `json:"temperature"` - TopP float64 `json:"p"` - TopK int `json:"k"` - MaxTokensToSample int `json:"max_tokens"` - StopSequences []string `json:"stop_sequences"` - ReturnLiklihoods string `json:"return_likelihoods"` - Stream bool `json:"stream"` - NumGenerations int `json:"num_generations"` + Prompt string `json:"prompt"` // The input text that serves as the starting point for generating the response. + Temperature float64 `json:"temperature"` // Use a lower value to decrease randomness in the response. + TopP float64 `json:"p"` // Use a lower value to ignore less probable options. Set to 0 or 1.0 to disable. + TopK int `json:"k"` // Specify the number of token choices the model uses to generate the next token. + MaxTokensToSample int `json:"max_tokens"` // Specify the maximum number of tokens to use in the generated response. + StopSequences []string `json:"stop_sequences"` // Configure up to four sequences that the model recognizes. + ReturnLiklihoods string `json:"return_likelihoods"` // Specify how and if the token likelihoods are returned with the response. + Stream bool `json:"stream"` // Specify true to return the response piece-by-piece in real-time and false to return the complete response after the process finishes. + NumGenerations int `json:"num_generations"` // The maximum number of generations that the model should return. } // CommandModelOutput contains the response type CommandModelOutput struct { - Generations []CommandGeneration `json:"generations"` + Generations []CommandGeneration `json:"generations"` // A list of generated results along with the likelihoods for tokens requested. } // CommandGeneration contains the chunks of text type CommandGeneration struct { - ID string `json:"id"` - Text string `json:"text"` + ID string `json:"id"` // An identifier for the generation. + Text string `json:"text"` // The generated text. }