From 3d37da63a5028dc1329bd3dd87603e36bbae9d13 Mon Sep 17 00:00:00 2001 From: Tanner Kvarfordt Date: Sat, 11 Nov 2023 18:29:48 -0700 Subject: [PATCH] API Updates (#10) * Specified Language field in audio transcription test and example * Added speech endpoint support to audio package * Updated audio example * Added missing fields to chat request * Updated common.makeRequest to handle error responses when the requested response type is a byte slice * Added missing fields to legacy completions endpoint * Fixed deprecated comment * Deprecated edits package * Added EncodingFormat to embeddings request * Added files.MakeRetrieveContentRequestNoDisk * Deprecated finetunes package * Updated README to reflect deprecations * Updated models package --- .gitignore | 5 +- README.md | 5 +- audio/audio.go | 48 +++++++++++- audio/audio_test.go | 40 +++++++++- chat/chat.go | 132 ++++++++++++++++++++++---------- common/common.go | 27 ++++++- completions/completions.go | 11 ++- edits/edits.go | 2 + embeddings/embeddings.go | 3 + examples/audio/audio-example.go | 27 ++++++- files/files.go | 42 +++++++--- finetunes/finetunes.go | 2 + models/models.go | 24 ++++-- 13 files changed, 295 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index a286a66..26e3bbd 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,7 @@ examples/files/files examples/finetunes/finetunes examples/images/images examples/models/models -examples/moderations/moderations \ No newline at end of file +examples/moderations/moderations + +# Example outputs +examples/audio/speech-creation.mp3 \ No newline at end of file diff --git a/README.md b/README.md index 266c8aa..65821a0 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,11 @@ The links below lead to examples of how to use each library package. - [x] [Audio](./audio/README.md) - [x] [Chat](./chat/README.md) - [x] [Completions](./completions/README.md) -- [x] [Edits](./edits/README.md) +- [x] ~~[Edits](./edits/README.md)~~ (Deprecated) - [x] [Embeddings](./embeddings/README.md) +- [] Fine-Tuning - [x] [Files](./files/README.md) -- [x] [Fine-Tunes](./finetunes/README.md) (Additional Testing Needed) +- [x] ~~[Fine-Tunes](./finetunes/README.md)~~ (Deprecated) - [x] [Images](./images/README.md) - [x] [Models](./models/README.md) - [x] [Moderations](./moderations/README.md) diff --git a/audio/audio.go b/audio/audio.go index 850c528..2d0e860 100644 --- a/audio/audio.go +++ b/audio/audio.go @@ -20,13 +20,16 @@ const ( BaseEndpoint = common.BaseURL + "audio/" TransciptionEndpoint = BaseEndpoint + "transcriptions" TranslationEndpoint = BaseEndpoint + "translations" + SpeechEndpoint = BaseEndpoint + "speech" ) type ResponseFormat = string const ( // TODO: Support non-json return formats. - JSONResponseFormat = "json" + ResponseFormatJSON = "json" + // [deprecated]: Use ResponseFormatJSON instead + JSONResponseFormat = ResponseFormatJSON //TextResponseFormat = "text" //SRTResponseFormat = "srt" //VerboseJSONResponseFormat = "verbose_json" @@ -152,3 +155,46 @@ func MakeTranslationRequest(request *TranslationRequest, organizationID *string) } return r, nil } + +const ( + VoiceAlloy = "alloy" + VoiceEcho = "echo" + VoiceFable = "fable" + VoiceOnyx = "onyx" + VoiceNova = "nova" + VoiceShimmer = "shimmer" + + SpeechFormatMp3 = "mp3" + SpeechFormatOpus = "opus" + SpeechFormatAac = "aac" + SpeechFormatFlac = "flac" +) + +// Request structure for the create speech endpoint. +type SpeechRequest struct { + // One of the available TTS models. + Model string `json:"model"` + + // The text to generate audio for. The maximum length is 4096 characters. + Input string `json:"input"` + + // The voice to use when generating the audio. + Voice string `json:"voice"` + + // The format to audio in. + ResponseFormat ResponseFormat `json:"response_format,omitempty"` + + // The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. + Speed float64 `json:"speed,omitempty"` +} + +func MakeSpeechRequest(request *SpeechRequest, organizationID *string) ([]byte, error) { + r, err := common.MakeRequest[SpeechRequest, []byte](request, SpeechEndpoint, http.MethodPost, organizationID) + if err != nil { + return nil, err + } + if r == nil { + return nil, errors.New("nil response received") + } + return *r, nil +} diff --git a/audio/audio_test.go b/audio/audio_test.go index 3079b79..83a6db9 100644 --- a/audio/audio_test.go +++ b/audio/audio_test.go @@ -1,11 +1,13 @@ package audio_test import ( + "errors" "os" "testing" "github.com/TannerKvarfordt/gopenai/audio" "github.com/TannerKvarfordt/gopenai/authentication" + "github.com/TannerKvarfordt/gopenai/common" ) const ( @@ -22,8 +24,9 @@ func init() { func TestTranscription(t *testing.T) { resp, err := audio.MakeTranscriptionRequest(&audio.TranscriptionRequest{ - File: transcriptionFilePath, - Model: model, + File: transcriptionFilePath, + Model: model, + Language: "en", }, nil) if err != nil { t.Fatal(err) @@ -49,3 +52,36 @@ func TestTranslation(t *testing.T) { return } } + +func TestSpeech(t *testing.T) { + resp, err := audio.MakeSpeechRequest(&audio.SpeechRequest{ + Model: "tts-1", + Input: "The quick brown fox jumps over the lazy dog.", + Voice: audio.VoiceAlloy, + ResponseFormat: audio.SpeechFormatMp3, + }, nil) + if err != nil { + t.Fatal(err) + return + } + if len(resp) == 0 { + t.Fatal("No audio returned") + return + } +} + +func TestInvalidSpeechRequest(t *testing.T) { + _, err := audio.MakeSpeechRequest(&audio.SpeechRequest{ + Model: "", + Input: "The quick brown fox jumps over the lazy dog.", + ResponseFormat: audio.SpeechFormatMp3, + }, nil) + if err == nil { + t.Fatal("Expected to receive an invalid request error") + return + } + respErr := new(common.ResponseError) + if !errors.As(err, &respErr) { + t.Fatal("Expected error to be of type common.ResponseError") + } +} diff --git a/chat/chat.go b/chat/chat.go index 2fe462a..db36326 100644 --- a/chat/chat.go +++ b/chat/chat.go @@ -24,23 +24,99 @@ const ( AssistantRole Role = "assistant" ) +type FunctionCall struct { + Arguments string `json:"arguments"` + Name string `json:"name"` +} + +type ToolCall struct { + ID string `json:"id"` + Type string `json:"type"` + Function FunctionCall `json:"function"` +} + type Chat struct { - Role Role `json:"role"` - Content string `json:"content"` + Content string `json:"content"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` + Role Role `json:"role"` + + // Deprecated: Use ToolCalls instead + FunctionCall []FunctionCall `json:"function_call,omitempty"` +} + +type ResponseFormat struct { + // Must be one of text or json_object. + Type string `json:"type,omitempty"` } // Request structure for the chat API endpoint. type Request struct { + // The messages to generate chat completions for, + // in the [chat format]. + // + // [chat format]: https://platform.openai.com/docs/guides/chat + Messages []Chat `json:"messages"` + // ID of the model to use. You can use the List models API // to see all of your available models, or see our Model // overview for descriptions of them. Model string `json:"model"` - // The messages to generate chat completions for, - // in the [chat format]. + // Number between -2.0 and 2.0. Positive values penalize new + // tokens based on their existing frequency in the text so far, + // decreasing the model's likelihood to repeat the same line verbatim. + FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` + + // Modify the likelihood of specified tokens appearing in the completion. + // Accepts a json object that maps tokens (specified by their token ID in + // the tokenizer) to an associated bias value from -100 to 100. Mathematically, + // the bias is added to the logits generated by the model prior to sampling. + // The exact effect will vary per model, but values between -1 and 1 should decrease + // or increase likelihood of selection; values like -100 or 100 should result in a + // ban or exclusive selection of the relevant token. + LogitBias map[string]int64 `json:"logit_bias,omitempty"` + + // The maximum number of tokens to generate in the chat completion. + // The total length of input tokens and generated tokens is limited + // by the model's context length. + MaxTokens *int64 `json:"max_tokens,omitempty"` + + // How many chat completion choices to generate for each input message. + N *int64 `json:"n,omitempty"` + + // Number between -2.0 and 2.0. Positive values penalize new tokens + // based on their existing frequency in the text so far, decreasing + // the model's likelihood to repeat the same line verbatim. + PresencePenalty *float64 `json:"presence_penalty,omitempty"` + + // An object specifying the format that the model must output. + // Setting to "json_object" enables JSON mode, which guarantees + // the message the model generates is valid JSON. // - // [chat format]: https://platform.openai.com/docs/guides/chat - Messages []Chat `json:"messages"` + // When using JSON mode, you must also instruct the model to produce + // JSON yourself via a system or user message. Without this, the model + // may generate an unending stream of whitespace until the generation + // reaches the token limit, resulting in a long-running and seemingly + // "stuck" request. Also note that the message content may be partially + // cut off if finish_reason="length", which indicates the generation + // exceeded max_tokens or the conversation exceeded the max context length. + ResponseFormat *ResponseFormat `json:"response_format,omitempty"` + + // This feature is in Beta. If specified, our system will make a best effort + // to sample deterministically, such that repeated requests with the same + // seed and parameters should return the same result. Determinism is not + // guaranteed, and you should refer to the system_fingerprint response + // parameter to monitor changes in the backend. + Seed int64 `json:"seed,omitempty"` + + // Up to 4 sequences where the API will stop generating further tokens. + Stop []string `json:"stop,omitempty"` + + // If set, partial message deltas will be sent, like in ChatGPT. Tokens + // will be sent as data-only server-sent events as they become available, + // with the stream terminated by a data: [DONE] message. See the OpenAI + // Cookbook for example code. + // Stream bool `json:"stream,omitempty"` TODO: Add streaming support // What sampling temperature to use, between 0 and 2. Higher values // like 0.8 will make the output more random, while lower values like @@ -55,36 +131,10 @@ type Request struct { // We generally recommend altering this or temperature but not both. TopP *float64 `json:"top_p,omitempty"` - // How many chat completion choices to generate for each input message. - N *int64 `json:"n,omitempty"` - - // If set, partial message deltas will be sent, like in ChatGPT. Tokens - // will be sent as data-only server-sent events as they become available, - // with the stream terminated by a data: [DONE] message. See the OpenAI - // Cookbook for example code. - // Stream bool `json:"stream,omitempty"` TODO: Add streaming support - - // Up to 4 sequences where the API will stop generating further tokens. - Stop []string `json:"stop,omitempty"` - - // The maximum number of tokens to generate in the chat completion. - // The total length of input tokens and generated tokens is limited - // by the model's context length. - MaxTokens *int64 `json:"max_tokens,omitempty"` - - // Number between -2.0 and 2.0. Positive values penalize new tokens - // based on their existing frequency in the text so far, decreasing - // the model's likelihood to repeat the same line verbatim. - PresencePenalty *float64 `json:"presence_penalty,omitempty"` - - // Modify the likelihood of specified tokens appearing in the completion. - // Accepts a json object that maps tokens (specified by their token ID in - // the tokenizer) to an associated bias value from -100 to 100. Mathematically, - // the bias is added to the logits generated by the model prior to sampling. - // The exact effect will vary per model, but values between -1 and 1 should decrease - // or increase likelihood of selection; values like -100 or 100 should result in a - // ban or exclusive selection of the relevant token. - LogitBias map[string]int64 `json:"logit_bias,omitempty"` + // TODO: Support tools + // Tools []Tool `json:"tools,omitempty"` + // TODO: Support ToolChoice + // ToolChoice ToolChoice `json:"tool_choice,omitempty"` // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. User string `json:"user,omitempty"` @@ -92,15 +142,17 @@ type Request struct { type Response struct { ID string `json:"id,omitempty"` - Object string `json:"object,omitempty"` - Created int64 `json:"created,omitempty"` Choices []struct { Index int64 `json:"index,omitempty"` Message Chat `json:"message,omitempty"` FinishReason string `json:"finish_reason,omitempty"` } - Usage common.ResponseUsage `json:"usage"` - Error *common.ResponseError `json:"error,omitempty"` + Created int64 `json:"created,omitempty"` + Model string `json:"model,omitempty"` + SystemFingerprint string `json:"system_fingerprint,omitempty"` + Object string `json:"object,omitempty"` + Usage common.ResponseUsage `json:"usage"` + Error *common.ResponseError `json:"error,omitempty"` } func MakeRequest(request *Request, organizationID *string) (*Response, error) { diff --git a/common/common.go b/common/common.go index 4b50a05..f4e2bfa 100644 --- a/common/common.go +++ b/common/common.go @@ -11,6 +11,7 @@ import ( "net/http" "net/url" "os" + "reflect" "strings" auth "github.com/TannerKvarfordt/gopenai/authentication" @@ -24,6 +25,10 @@ const ( BaseURL = "https://api.openai.com/" + APIVersion + "/" ) +type responseErrorWrapper struct { + Error *ResponseError `json:"error,omitempty"` +} + // A common error structure included in OpenAI API response bodies. type ResponseError struct { // The error message. @@ -117,13 +122,29 @@ func makeRequest[ResponseT any](req *http.Request) (*ResponseT, error) { return nil, errors.New("unable to parse response body") } - response := new(ResponseT) - err = json.Unmarshal(respBody, response) + var response ResponseT + if _, ok := any(response).([]byte); ok { + // Special case for handling binary return types. + // Defer to the caller to do what they will with + // the response. + v := reflect.ValueOf(&response).Elem() + v.Set(reflect.MakeSlice(v.Type(), len(respBody), cap(respBody))) + v.SetBytes(respBody) + + respErr := responseErrorWrapper{} + json.Unmarshal(respBody, &respErr) + if respErr.Error != nil { + return &response, respErr.Error + } + return &response, nil + } + + err = json.Unmarshal(respBody, &response) if err != nil { return nil, err } - return response, nil + return &response, nil } func IsUrl(str string) bool { diff --git a/completions/completions.go b/completions/completions.go index 452a6d7..3a286ce 100644 --- a/completions/completions.go +++ b/completions/completions.go @@ -107,6 +107,12 @@ type Request struct { // A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. User string `json:"user,omitempty"` + + // If specified, our system will make a best effort to sample deterministically, such that repeated + // requests with the same seed and parameters should return the same result. Determinism is not + // guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes + // in the backend. + Seed *int64 `json:"seed,omitempty"` } // Response structure for the completions API endpoint. @@ -126,8 +132,9 @@ type Response struct { TextOffset []uint64 `json:"text_offset"` } `json:"logprobs"` } `json:"choices"` - Usage common.ResponseUsage `json:"usage"` - Error *common.ResponseError `json:"error,omitempty"` + SystemFingerprint string `json:"system_fingerprint"` + Usage common.ResponseUsage `json:"usage"` + Error *common.ResponseError `json:"error,omitempty"` } // Make a completions request. diff --git a/edits/edits.go b/edits/edits.go index 1a3efbe..38302dd 100644 --- a/edits/edits.go +++ b/edits/edits.go @@ -1,3 +1,5 @@ +// Deprecated: Replaced by GPT-3.5 Turbo. See https://community.openai.com/t/openai-deprecation-summary/289539 +// // Package edits provides bindings for the [edits] [endpoint]. // Given a prompt and an instruction, the model will return // an edited version of the prompt. diff --git a/embeddings/embeddings.go b/embeddings/embeddings.go index 929b94d..8e8718b 100644 --- a/embeddings/embeddings.go +++ b/embeddings/embeddings.go @@ -31,6 +31,9 @@ type Request struct { // A unique identifier representing your end-user, which can help OpenAI to // monitor and detect abuse. User string `json:"user"` + + // The format to return the embeddings in. Can be either float or base64. + EncodingFormat string `json:"encoding_format,omitempty"` } // Response structure for the embeddings API endpoint. diff --git a/examples/audio/audio-example.go b/examples/audio/audio-example.go index 9a87df4..10077ca 100644 --- a/examples/audio/audio-example.go +++ b/examples/audio/audio-example.go @@ -25,8 +25,9 @@ func main() { { // Transcription fmt.Printf("Sending transcription request for file %s...\n", transcriptionFile) r, err := audio.MakeTranscriptionRequest(&audio.TranscriptionRequest{ - File: transcriptionFile, - Model: model, + File: transcriptionFile, + Model: model, + Language: "en", }, nil) if err != nil { fmt.Printf("Error with transcription request: %s\n", err) @@ -47,4 +48,26 @@ func main() { fmt.Printf("Translated audio: %s\n", r.Text) } } + + { // Speech + const s string = "The quick brown fox jumps over the lazy dog." + fmt.Printf("Sending speech creation request for \"%s\"\n", s) + resp, err := audio.MakeSpeechRequest(&audio.SpeechRequest{ + Model: "tts-1", + Input: s, + Voice: audio.VoiceNova, + ResponseFormat: audio.SpeechFormatMp3, + }, nil) + if err != nil { + fmt.Printf("Error with speech creation request: %s\n", err) + } + if len(resp) == 0 { + fmt.Println("No TTS audio returned. :(") + } else { + err = os.WriteFile(fmt.Sprintf("speech-creation.%s", audio.SpeechFormatMp3), resp, 0644) + if err != nil { + fmt.Printf("Error writing %s to disk: %s\n", audio.SpeechFormatMp3, err) + } + } + } } diff --git a/files/files.go b/files/files.go index ddd3291..3571b05 100644 --- a/files/files.go +++ b/files/files.go @@ -156,34 +156,52 @@ func MakeRetrieveContentRequest(fileID, filepath string, overwrite bool, organiz return os.ErrExist } - req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/%s/content", Endpoint, fileID), nil) + respBody, err := MakeRetrieveContentRequestNoDisk(fileID, organizationID) + if err != nil { + return err + } + + fout, err := os.Create(filepath) if err != nil { return err } + defer fout.Close() + + _, err = io.Copy(fout, bytes.NewBuffer(respBody)) + if err != nil { + return err + } + + return nil +} + +// Retreives "fileID" from Open AI, and returns the bytes of the file. +func MakeRetrieveContentRequestNoDisk(fileID string, organizationID *string) ([]byte, error) { + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/%s/content", Endpoint, fileID), nil) + if err != nil { + return nil, err + } if req == nil { - return errors.New("nil request created") + return nil, errors.New("nil request created") } common.SetRequestHeaders(req, "application/json", organizationID) resp, err := http.DefaultClient.Do(req) if err != nil { - return err + return nil, err } if resp == nil { - return errors.New("nil response received") + return nil, errors.New("nil response received") } defer resp.Body.Close() - fout, err := os.Create(filepath) + respBody, err := io.ReadAll(resp.Body) if err != nil { - return err + return nil, err } - defer fout.Close() - - _, err = io.Copy(fout, resp.Body) - if err != nil { - return err + if respBody == nil { + return nil, errors.New("unable to parse response body") } - return nil + return respBody, nil } diff --git a/finetunes/finetunes.go b/finetunes/finetunes.go index f9f6882..f4515ff 100644 --- a/finetunes/finetunes.go +++ b/finetunes/finetunes.go @@ -1,3 +1,5 @@ +// Deprecated: See https://platform.openai.com/docs/deprecations/2023-08-22-fine-tunes-endpoint +// // Package finetunes provides bindings for the [finetunes] [endpoint]. // Manage fine-tuning jobs to tailor a model to your specific training data. // Related guide: [Fine-tune models]. diff --git a/models/models.go b/models/models.go index a5d736f..e62fa84 100644 --- a/models/models.go +++ b/models/models.go @@ -20,13 +20,20 @@ const Endpoint = common.BaseURL + "models" // Response structure for a Retrieve Model request. type ModelResponse struct { - ID string `json:"id"` - Created uint64 `json:"created"` - OwnedBy string `json:"owned_by"` - Root string `json:"root"` - Parent *string `json:"parent"` - Error *common.ResponseError `json:"error,omitempty"` + ID string `json:"id"` + Created uint64 `json:"created"` + Object string `json:"object"` + OwnedBy string `json:"owned_by"` + // Deprecated: No longer listed in the API docs. + Root string `json:"root"` + // Deprecated: No longer listed in the API docs. + Parent *string `json:"parent"` + + Error *common.ResponseError `json:"error,omitempty"` + + // Deprecated: No longer listed in the API docs. + // // The values of each permission object (aka, map) // in this list are non-homogeneous. Generally, // they are strings, integers, or booleans, but @@ -36,8 +43,9 @@ type ModelResponse struct { // Response structure for a List Models request. type ListModelsResponse struct { - Data []ModelResponse `json:"data"` - Error *common.ResponseError `json:"error,omitempty"` + Object string `json:"object"` + Data []ModelResponse `json:"data"` + Error *common.ResponseError `json:"error,omitempty"` } // Lists the currently available models, and provides basic information about each one such as the owner and availability.