Skip to content

Commit 16e094b

Browse files
Stainless Botstainless-app[bot]
Stainless Bot
authored andcommitted
feat(api): add gpt-4o-audio-preview model for chat completions (#1135)
This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio
1 parent 3c32662 commit 16e094b

File tree

9 files changed

+183
-7
lines changed

9 files changed

+183
-7
lines changed

.stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 68
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-71e58a77027c67e003fdd1b1ac8ac11557d8bfabc7666d1a827c6b1ca8ab98b5.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml

api.md

+4
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,20 @@ Types:
3333

3434
- <code><a href="./src/resources/chat/completions.ts">ChatCompletion</a></code>
3535
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAssistantMessageParam</a></code>
36+
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAudio</a></code>
37+
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAudioParam</a></code>
3638
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionChunk</a></code>
3739
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPart</a></code>
3840
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartImage</a></code>
41+
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartInputAudio</a></code>
3942
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartRefusal</a></code>
4043
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartText</a></code>
4144
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionFunctionCallOption</a></code>
4245
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionFunctionMessageParam</a></code>
4346
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessage</a></code>
4447
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessageParam</a></code>
4548
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessageToolCall</a></code>
49+
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionModality</a></code>
4650
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionNamedToolChoice</a></code>
4751
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionRole</a></code>
4852
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionStreamOptions</a></code>

src/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -250,16 +250,20 @@ export namespace OpenAI {
250250
export import ChatModel = API.ChatModel;
251251
export import ChatCompletion = API.ChatCompletion;
252252
export import ChatCompletionAssistantMessageParam = API.ChatCompletionAssistantMessageParam;
253+
export import ChatCompletionAudio = API.ChatCompletionAudio;
254+
export import ChatCompletionAudioParam = API.ChatCompletionAudioParam;
253255
export import ChatCompletionChunk = API.ChatCompletionChunk;
254256
export import ChatCompletionContentPart = API.ChatCompletionContentPart;
255257
export import ChatCompletionContentPartImage = API.ChatCompletionContentPartImage;
258+
export import ChatCompletionContentPartInputAudio = API.ChatCompletionContentPartInputAudio;
256259
export import ChatCompletionContentPartRefusal = API.ChatCompletionContentPartRefusal;
257260
export import ChatCompletionContentPartText = API.ChatCompletionContentPartText;
258261
export import ChatCompletionFunctionCallOption = API.ChatCompletionFunctionCallOption;
259262
export import ChatCompletionFunctionMessageParam = API.ChatCompletionFunctionMessageParam;
260263
export import ChatCompletionMessage = API.ChatCompletionMessage;
261264
export import ChatCompletionMessageParam = API.ChatCompletionMessageParam;
262265
export import ChatCompletionMessageToolCall = API.ChatCompletionMessageToolCall;
266+
export import ChatCompletionModality = API.ChatCompletionModality;
263267
export import ChatCompletionNamedToolChoice = API.ChatCompletionNamedToolChoice;
264268
export import ChatCompletionRole = API.ChatCompletionRole;
265269
export import ChatCompletionStreamOptions = API.ChatCompletionStreamOptions;

src/lib/AbstractChatCompletionRunner.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,9 @@ export class AbstractChatCompletionRunner<
105105
const message = this.messages[i];
106106
if (isAssistantMessage(message)) {
107107
const { function_call, ...rest } = message;
108-
const ret: ChatCompletionMessage = {
108+
109+
// TODO: support audio here
110+
const ret: Omit<ChatCompletionMessage, 'audio'> = {
109111
...rest,
110112
content: (message as ChatCompletionMessage).content ?? null,
111113
refusal: (message as ChatCompletionMessage).refusal ?? null,

src/resources/beta/assistants.ts

+10
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ export namespace AssistantStreamEvent {
298298
data: ThreadsAPI.Thread;
299299

300300
event: 'thread.created';
301+
302+
/**
303+
* Whether to enable input audio transcription.
304+
*/
305+
enabled?: boolean;
301306
}
302307

303308
/**
@@ -1084,6 +1089,11 @@ export interface ThreadStreamEvent {
10841089
data: ThreadsAPI.Thread;
10851090

10861091
event: 'thread.created';
1092+
1093+
/**
1094+
* Whether to enable input audio transcription.
1095+
*/
1096+
enabled?: boolean;
10871097
}
10881098

10891099
export interface AssistantCreateParams {

src/resources/chat/chat.ts

+7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ export type ChatModel =
1616
| 'gpt-4o'
1717
| 'gpt-4o-2024-08-06'
1818
| 'gpt-4o-2024-05-13'
19+
| 'gpt-4o-realtime-preview'
1920
| 'gpt-4o-realtime-preview-2024-10-01'
21+
| 'gpt-4o-audio-preview'
22+
| 'gpt-4o-audio-preview-2024-10-01'
2023
| 'chatgpt-4o-latest'
2124
| 'gpt-4o-mini'
2225
| 'gpt-4o-mini-2024-07-18'
@@ -45,16 +48,20 @@ export namespace Chat {
4548
export import Completions = CompletionsAPI.Completions;
4649
export import ChatCompletion = CompletionsAPI.ChatCompletion;
4750
export import ChatCompletionAssistantMessageParam = CompletionsAPI.ChatCompletionAssistantMessageParam;
51+
export import ChatCompletionAudio = CompletionsAPI.ChatCompletionAudio;
52+
export import ChatCompletionAudioParam = CompletionsAPI.ChatCompletionAudioParam;
4853
export import ChatCompletionChunk = CompletionsAPI.ChatCompletionChunk;
4954
export import ChatCompletionContentPart = CompletionsAPI.ChatCompletionContentPart;
5055
export import ChatCompletionContentPartImage = CompletionsAPI.ChatCompletionContentPartImage;
56+
export import ChatCompletionContentPartInputAudio = CompletionsAPI.ChatCompletionContentPartInputAudio;
5157
export import ChatCompletionContentPartRefusal = CompletionsAPI.ChatCompletionContentPartRefusal;
5258
export import ChatCompletionContentPartText = CompletionsAPI.ChatCompletionContentPartText;
5359
export import ChatCompletionFunctionCallOption = CompletionsAPI.ChatCompletionFunctionCallOption;
5460
export import ChatCompletionFunctionMessageParam = CompletionsAPI.ChatCompletionFunctionMessageParam;
5561
export import ChatCompletionMessage = CompletionsAPI.ChatCompletionMessage;
5662
export import ChatCompletionMessageParam = CompletionsAPI.ChatCompletionMessageParam;
5763
export import ChatCompletionMessageToolCall = CompletionsAPI.ChatCompletionMessageToolCall;
64+
export import ChatCompletionModality = CompletionsAPI.ChatCompletionModality;
5865
export import ChatCompletionNamedToolChoice = CompletionsAPI.ChatCompletionNamedToolChoice;
5966
export import ChatCompletionRole = CompletionsAPI.ChatCompletionRole;
6067
export import ChatCompletionStreamOptions = CompletionsAPI.ChatCompletionStreamOptions;

src/resources/chat/completions.ts

+148-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ import { Stream } from '../../streaming';
1111

1212
export class Completions extends APIResource {
1313
/**
14-
* Creates a model response for the given chat conversation.
14+
* Creates a model response for the given chat conversation. Learn more in the
15+
* [text generation](https://platform.openai.com/docs/guides/text-generation),
16+
* [vision](https://platform.openai.com/docs/guides/vision), and
17+
* [audio](https://platform.openai.com/docs/guides/audio) guides.
1518
*/
1619
create(
1720
body: ChatCompletionCreateParamsNonStreaming,
@@ -138,6 +141,12 @@ export interface ChatCompletionAssistantMessageParam {
138141
*/
139142
role: 'assistant';
140143

144+
/**
145+
* Data about a previous audio response from the model.
146+
* [Learn more](https://platform.openai.com/docs/guides/audio).
147+
*/
148+
audio?: ChatCompletionAssistantMessageParam.Audio | null;
149+
141150
/**
142151
* The contents of the assistant message. Required unless `tool_calls` or
143152
* `function_call` is specified.
@@ -168,6 +177,17 @@ export interface ChatCompletionAssistantMessageParam {
168177
}
169178

170179
export namespace ChatCompletionAssistantMessageParam {
180+
/**
181+
* Data about a previous audio response from the model.
182+
* [Learn more](https://platform.openai.com/docs/guides/audio).
183+
*/
184+
export interface Audio {
185+
/**
186+
* Unique identifier for a previous audio response from the model.
187+
*/
188+
id: string;
189+
}
190+
171191
/**
172192
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
173193
* a function that should be called, as generated by the model.
@@ -188,6 +208,54 @@ export namespace ChatCompletionAssistantMessageParam {
188208
}
189209
}
190210

211+
/**
212+
* If the audio output modality is requested, this object contains data about the
213+
* audio response from the model.
214+
* [Learn more](https://platform.openai.com/docs/guides/audio).
215+
*/
216+
export interface ChatCompletionAudio {
217+
/**
218+
* Unique identifier for this audio response.
219+
*/
220+
id: string;
221+
222+
/**
223+
* Base64 encoded audio bytes generated by the model, in the format specified in
224+
* the request.
225+
*/
226+
data: string;
227+
228+
/**
229+
* The Unix timestamp (in seconds) for when this audio response will no longer be
230+
* accessible on the server for use in multi-turn conversations.
231+
*/
232+
expires_at: number;
233+
234+
/**
235+
* Transcript of the audio generated by the model.
236+
*/
237+
transcript: string;
238+
}
239+
240+
/**
241+
* Parameters for audio output. Required when audio output is requested with
242+
* `modalities: ["audio"]`.
243+
* [Learn more](https://platform.openai.com/docs/guides/audio).
244+
*/
245+
export interface ChatCompletionAudioParam {
246+
/**
247+
* Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
248+
* or `pcm16`.
249+
*/
250+
format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16';
251+
252+
/**
253+
* Specifies the voice type. Supported voices are `alloy`, `echo`, `fable`, `onyx`,
254+
* `nova`, and `shimmer`.
255+
*/
256+
voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
257+
}
258+
191259
/**
192260
* Represents a streamed chunk of a chat completion response returned by model,
193261
* based on the provided input.
@@ -371,8 +439,18 @@ export namespace ChatCompletionChunk {
371439
}
372440
}
373441

374-
export type ChatCompletionContentPart = ChatCompletionContentPartText | ChatCompletionContentPartImage;
442+
/**
443+
* Learn about
444+
* [text inputs](https://platform.openai.com/docs/guides/text-generation).
445+
*/
446+
export type ChatCompletionContentPart =
447+
| ChatCompletionContentPartText
448+
| ChatCompletionContentPartImage
449+
| ChatCompletionContentPartInputAudio;
375450

451+
/**
452+
* Learn about [image inputs](https://platform.openai.com/docs/guides/vision).
453+
*/
376454
export interface ChatCompletionContentPartImage {
377455
image_url: ChatCompletionContentPartImage.ImageURL;
378456

@@ -397,6 +475,32 @@ export namespace ChatCompletionContentPartImage {
397475
}
398476
}
399477

478+
/**
479+
* Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
480+
*/
481+
export interface ChatCompletionContentPartInputAudio {
482+
input_audio: ChatCompletionContentPartInputAudio.InputAudio;
483+
484+
/**
485+
* The type of the content part. Always `input_audio`.
486+
*/
487+
type: 'input_audio';
488+
}
489+
490+
export namespace ChatCompletionContentPartInputAudio {
491+
export interface InputAudio {
492+
/**
493+
* Base64 encoded audio data.
494+
*/
495+
data: string;
496+
497+
/**
498+
* The format of the encoded audio data. Currently supports "wav" and "mp3".
499+
*/
500+
format: 'wav' | 'mp3';
501+
}
502+
}
503+
400504
export interface ChatCompletionContentPartRefusal {
401505
/**
402506
* The refusal message generated by the model.
@@ -409,6 +513,10 @@ export interface ChatCompletionContentPartRefusal {
409513
type: 'refusal';
410514
}
411515

516+
/**
517+
* Learn about
518+
* [text inputs](https://platform.openai.com/docs/guides/text-generation).
519+
*/
412520
export interface ChatCompletionContentPartText {
413521
/**
414522
* The text content.
@@ -471,6 +579,13 @@ export interface ChatCompletionMessage {
471579
*/
472580
role: 'assistant';
473581

582+
/**
583+
* If the audio output modality is requested, this object contains data about the
584+
* audio response from the model.
585+
* [Learn more](https://platform.openai.com/docs/guides/audio).
586+
*/
587+
audio?: ChatCompletionAudio | null;
588+
474589
/**
475590
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
476591
* a function that should be called, as generated by the model.
@@ -548,6 +663,8 @@ export namespace ChatCompletionMessageToolCall {
548663
}
549664
}
550665

666+
export type ChatCompletionModality = 'text' | 'audio';
667+
551668
/**
552669
* Specifies a tool the model should use. Use to force the model to call a specific
553670
* function.
@@ -743,6 +860,13 @@ export interface ChatCompletionCreateParamsBase {
743860
*/
744861
model: (string & {}) | ChatAPI.ChatModel;
745862

863+
/**
864+
* Parameters for audio output. Required when audio output is requested with
865+
* `modalities: ["audio"]`.
866+
* [Learn more](https://platform.openai.com/docs/guides/audio).
867+
*/
868+
audio?: ChatCompletionAudioParam | null;
869+
746870
/**
747871
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their
748872
* existing frequency in the text so far, decreasing the model's likelihood to
@@ -812,10 +936,24 @@ export interface ChatCompletionCreateParamsBase {
812936

813937
/**
814938
* Developer-defined tags and values used for filtering completions in the
815-
* [dashboard](https://platform.openai.com/completions).
939+
* [dashboard](https://platform.openai.com/chat-completions).
816940
*/
817941
metadata?: Record<string, string> | null;
818942

943+
/**
944+
* Output types that you would like the model to generate for this request. Most
945+
* models are capable of generating text, which is the default:
946+
*
947+
* `["text"]`
948+
*
949+
* The `gpt-4o-audio-preview` model can also be used to
950+
* [generate audio](https://platform.openai.com/docs/guides/audio). To request that
951+
* this model generate both text and audio responses, you can use:
952+
*
953+
* `["text", "audio"]`
954+
*/
955+
modalities?: Array<ChatCompletionModality> | null;
956+
819957
/**
820958
* How many chat completion choices to generate for each input message. Note that
821959
* you will be charged based on the number of generated tokens across all of the
@@ -900,8 +1038,9 @@ export interface ChatCompletionCreateParamsBase {
9001038
stop?: string | null | Array<string>;
9011039

9021040
/**
903-
* Whether or not to store the output of this completion request for traffic
904-
* logging in the [dashboard](https://platform.openai.com/completions).
1041+
* Whether or not to store the output of this chat completion request for use in
1042+
* our [model distillation](https://platform.openai.com/docs/guides/distillation)
1043+
* or [evals](https://platform.openai.com/docs/guides/evals) products.
9051044
*/
9061045
store?: boolean | null;
9071046

@@ -1049,16 +1188,20 @@ export type CompletionCreateParamsStreaming = ChatCompletionCreateParamsStreamin
10491188
export namespace Completions {
10501189
export import ChatCompletion = ChatCompletionsAPI.ChatCompletion;
10511190
export import ChatCompletionAssistantMessageParam = ChatCompletionsAPI.ChatCompletionAssistantMessageParam;
1191+
export import ChatCompletionAudio = ChatCompletionsAPI.ChatCompletionAudio;
1192+
export import ChatCompletionAudioParam = ChatCompletionsAPI.ChatCompletionAudioParam;
10521193
export import ChatCompletionChunk = ChatCompletionsAPI.ChatCompletionChunk;
10531194
export import ChatCompletionContentPart = ChatCompletionsAPI.ChatCompletionContentPart;
10541195
export import ChatCompletionContentPartImage = ChatCompletionsAPI.ChatCompletionContentPartImage;
1196+
export import ChatCompletionContentPartInputAudio = ChatCompletionsAPI.ChatCompletionContentPartInputAudio;
10551197
export import ChatCompletionContentPartRefusal = ChatCompletionsAPI.ChatCompletionContentPartRefusal;
10561198
export import ChatCompletionContentPartText = ChatCompletionsAPI.ChatCompletionContentPartText;
10571199
export import ChatCompletionFunctionCallOption = ChatCompletionsAPI.ChatCompletionFunctionCallOption;
10581200
export import ChatCompletionFunctionMessageParam = ChatCompletionsAPI.ChatCompletionFunctionMessageParam;
10591201
export import ChatCompletionMessage = ChatCompletionsAPI.ChatCompletionMessage;
10601202
export import ChatCompletionMessageParam = ChatCompletionsAPI.ChatCompletionMessageParam;
10611203
export import ChatCompletionMessageToolCall = ChatCompletionsAPI.ChatCompletionMessageToolCall;
1204+
export import ChatCompletionModality = ChatCompletionsAPI.ChatCompletionModality;
10621205
export import ChatCompletionNamedToolChoice = ChatCompletionsAPI.ChatCompletionNamedToolChoice;
10631206
export import ChatCompletionRole = ChatCompletionsAPI.ChatCompletionRole;
10641207
export import ChatCompletionStreamOptions = ChatCompletionsAPI.ChatCompletionStreamOptions;

src/resources/chat/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@
33
export {
44
ChatCompletion,
55
ChatCompletionAssistantMessageParam,
6+
ChatCompletionAudio,
7+
ChatCompletionAudioParam,
68
ChatCompletionChunk,
79
ChatCompletionContentPart,
810
ChatCompletionContentPartImage,
11+
ChatCompletionContentPartInputAudio,
912
ChatCompletionContentPartRefusal,
1013
ChatCompletionContentPartText,
1114
ChatCompletionFunctionCallOption,
1215
ChatCompletionFunctionMessageParam,
1316
ChatCompletionMessage,
1417
ChatCompletionMessageParam,
1518
ChatCompletionMessageToolCall,
19+
ChatCompletionModality,
1620
ChatCompletionNamedToolChoice,
1721
ChatCompletionRole,
1822
ChatCompletionStreamOptions,

0 commit comments

Comments
 (0)