Skip to content

Commit 608200f

Browse files
feat(api): add o3-mini (#1295)
fix(types): correct metadata type + other fixes
1 parent a0519f5 commit 608200f

21 files changed

+320
-144
lines changed

.stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 69
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-6204952a29973265b9c0d66fc67ffaf53c6a90ae4d75cdacf9d147676f5274c9.yml

api.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Types:
55
- <code><a href="./src/resources/shared.ts">ErrorObject</a></code>
66
- <code><a href="./src/resources/shared.ts">FunctionDefinition</a></code>
77
- <code><a href="./src/resources/shared.ts">FunctionParameters</a></code>
8+
- <code><a href="./src/resources/shared.ts">Metadata</a></code>
89
- <code><a href="./src/resources/shared.ts">ResponseFormatJSONObject</a></code>
910
- <code><a href="./src/resources/shared.ts">ResponseFormatJSONSchema</a></code>
1011
- <code><a href="./src/resources/shared.ts">ResponseFormatText</a></code>

src/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ export declare namespace OpenAI {
451451
export type ErrorObject = API.ErrorObject;
452452
export type FunctionDefinition = API.FunctionDefinition;
453453
export type FunctionParameters = API.FunctionParameters;
454+
export type Metadata = API.Metadata;
454455
export type ResponseFormatJSONObject = API.ResponseFormatJSONObject;
455456
export type ResponseFormatJSONSchema = API.ResponseFormatJSONSchema;
456457
export type ResponseFormatText = API.ResponseFormatText;

src/resources/audio/transcriptions.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ export interface TranscriptionCreateParams<
166166

167167
/**
168168
* The language of the input audio. Supplying the input language in
169-
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
170-
* improve accuracy and latency.
169+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
170+
* format will improve accuracy and latency.
171171
*/
172172
language?: string;
173173

src/resources/batches.ts

+14-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { APIResource } from '../resource';
44
import { isRequestOptions } from '../core';
55
import * as Core from '../core';
66
import * as BatchesAPI from './batches';
7+
import * as Shared from './shared';
78
import { CursorPage, type CursorPageParams } from '../pagination';
89

910
export class Batches extends APIResource {
@@ -138,11 +139,13 @@ export interface Batch {
138139

139140
/**
140141
* Set of 16 key-value pairs that can be attached to an object. This can be useful
141-
* for storing additional information about the object in a structured format. Keys
142-
* can be a maximum of 64 characters long and values can be a maxium of 512
143-
* characters long.
142+
* for storing additional information about the object in a structured format, and
143+
* querying for objects via API or the dashboard.
144+
*
145+
* Keys are strings with a maximum length of 64 characters. Values are strings with
146+
* a maximum length of 512 characters.
144147
*/
145-
metadata?: unknown | null;
148+
metadata?: Shared.Metadata | null;
146149

147150
/**
148151
* The ID of the file containing the outputs of successfully executed requests.
@@ -237,9 +240,14 @@ export interface BatchCreateParams {
237240
input_file_id: string;
238241

239242
/**
240-
* Optional custom metadata for the batch.
243+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
244+
* for storing additional information about the object in a structured format, and
245+
* querying for objects via API or the dashboard.
246+
*
247+
* Keys are strings with a maximum length of 64 characters. Values are strings with
248+
* a maximum length of 512 characters.
241249
*/
242-
metadata?: Record<string, string> | null;
250+
metadata?: Shared.Metadata | null;
243251
}
244252

245253
export interface BatchListParams extends CursorPageParams {}

src/resources/beta/assistants.ts

+25-17
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,13 @@ export interface Assistant {
111111

112112
/**
113113
* Set of 16 key-value pairs that can be attached to an object. This can be useful
114-
* for storing additional information about the object in a structured format. Keys
115-
* can be a maximum of 64 characters long and values can be a maxium of 512
116-
* characters long.
114+
* for storing additional information about the object in a structured format, and
115+
* querying for objects via API or the dashboard.
116+
*
117+
* Keys are strings with a maximum length of 64 characters. Values are strings with
118+
* a maximum length of 512 characters.
117119
*/
118-
metadata: unknown | null;
120+
metadata: Shared.Metadata | null;
119121

120122
/**
121123
* ID of the model to use. You can use the
@@ -1118,11 +1120,13 @@ export interface AssistantCreateParams {
11181120

11191121
/**
11201122
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1121-
* for storing additional information about the object in a structured format. Keys
1122-
* can be a maximum of 64 characters long and values can be a maxium of 512
1123-
* characters long.
1123+
* for storing additional information about the object in a structured format, and
1124+
* querying for objects via API or the dashboard.
1125+
*
1126+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1127+
* a maximum length of 512 characters.
11241128
*/
1125-
metadata?: unknown | null;
1129+
metadata?: Shared.Metadata | null;
11261130

11271131
/**
11281132
* The name of the assistant. The maximum length is 256 characters.
@@ -1242,12 +1246,14 @@ export namespace AssistantCreateParams {
12421246
file_ids?: Array<string>;
12431247

12441248
/**
1245-
* Set of 16 key-value pairs that can be attached to a vector store. This can be
1246-
* useful for storing additional information about the vector store in a structured
1247-
* format. Keys can be a maximum of 64 characters long and values can be a maxium
1248-
* of 512 characters long.
1249+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1250+
* for storing additional information about the object in a structured format, and
1251+
* querying for objects via API or the dashboard.
1252+
*
1253+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1254+
* a maximum length of 512 characters.
12491255
*/
1250-
metadata?: unknown;
1256+
metadata?: Shared.Metadata | null;
12511257
}
12521258
}
12531259
}
@@ -1267,11 +1273,13 @@ export interface AssistantUpdateParams {
12671273

12681274
/**
12691275
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1270-
* for storing additional information about the object in a structured format. Keys
1271-
* can be a maximum of 64 characters long and values can be a maxium of 512
1272-
* characters long.
1276+
* for storing additional information about the object in a structured format, and
1277+
* querying for objects via API or the dashboard.
1278+
*
1279+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1280+
* a maximum length of 512 characters.
12731281
*/
1274-
metadata?: unknown | null;
1282+
metadata?: Shared.Metadata | null;
12751283

12761284
/**
12771285
* ID of the model to use. You can use the

src/resources/beta/realtime/realtime.ts

+79-10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { APIResource } from '../../../resource';
44
import * as RealtimeAPI from './realtime';
5+
import * as Shared from '../../shared';
56
import * as SessionsAPI from './sessions';
67
import {
78
Session as SessionsAPISession,
@@ -741,9 +742,38 @@ export interface RealtimeResponse {
741742
id?: string;
742743

743744
/**
744-
* Developer-provided string key-value pairs associated with this response.
745+
* Which conversation the response is added to, determined by the `conversation`
746+
* field in the `response.create` event. If `auto`, the response will be added to
747+
* the default conversation and the value of `conversation_id` will be an id like
748+
* `conv_1234`. If `none`, the response will not be added to any conversation and
749+
* the value of `conversation_id` will be `null`. If responses are being triggered
750+
* by server VAD, the response will be added to the default conversation, thus the
751+
* `conversation_id` will be an id like `conv_1234`.
745752
*/
746-
metadata?: unknown | null;
753+
conversation_id?: string;
754+
755+
/**
756+
* Maximum number of output tokens for a single assistant response, inclusive of
757+
* tool calls, that was used in this response.
758+
*/
759+
max_output_tokens?: number | 'inf';
760+
761+
/**
762+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
763+
* for storing additional information about the object in a structured format, and
764+
* querying for objects via API or the dashboard.
765+
*
766+
* Keys are strings with a maximum length of 64 characters. Values are strings with
767+
* a maximum length of 512 characters.
768+
*/
769+
metadata?: Shared.Metadata | null;
770+
771+
/**
772+
* The set of modalities the model used to respond. If there are multiple
773+
* modalities, the model will pick one, for example if `modalities` is
774+
* `["text", "audio"]`, the model could be responding in either text or audio.
775+
*/
776+
modalities?: Array<'text' | 'audio'>;
747777

748778
/**
749779
* The object type, must be `realtime.response`.
@@ -755,6 +785,11 @@ export interface RealtimeResponse {
755785
*/
756786
output?: Array<ConversationItem>;
757787

788+
/**
789+
* The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
790+
*/
791+
output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
792+
758793
/**
759794
* The final status of the response (`completed`, `cancelled`, `failed`, or
760795
* `incomplete`).
@@ -766,13 +801,24 @@ export interface RealtimeResponse {
766801
*/
767802
status_details?: RealtimeResponseStatus;
768803

804+
/**
805+
* Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
806+
*/
807+
temperature?: number;
808+
769809
/**
770810
* Usage statistics for the Response, this will correspond to billing. A Realtime
771811
* API session will maintain a conversation context and append new Items to the
772812
* Conversation, thus output from previous turns (text and audio tokens) will
773813
* become the input for later turns.
774814
*/
775815
usage?: RealtimeResponseUsage;
816+
817+
/**
818+
* The voice the model used to respond. Current voice options are `alloy`, `ash`,
819+
* `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
820+
*/
821+
voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
776822
}
777823

778824
/**
@@ -1320,11 +1366,13 @@ export namespace ResponseCreateEvent {
13201366

13211367
/**
13221368
* Set of 16 key-value pairs that can be attached to an object. This can be useful
1323-
* for storing additional information about the object in a structured format. Keys
1324-
* can be a maximum of 64 characters long and values can be a maximum of 512
1325-
* characters long.
1369+
* for storing additional information about the object in a structured format, and
1370+
* querying for objects via API or the dashboard.
1371+
*
1372+
* Keys are strings with a maximum length of 64 characters. Values are strings with
1373+
* a maximum length of 512 characters.
13261374
*/
1327-
metadata?: unknown | null;
1375+
metadata?: Shared.Metadata | null;
13281376

13291377
/**
13301378
* The set of modalities the model can respond with. To disable audio, set this to
@@ -1716,8 +1764,11 @@ export namespace SessionUpdateEvent {
17161764
* Configuration for input audio transcription, defaults to off and can be set to
17171765
* `null` to turn off once on. Input audio transcription is not native to the
17181766
* model, since the model consumes audio directly. Transcription runs
1719-
* asynchronously through Whisper and should be treated as rough guidance rather
1720-
* than the representation understood by the model.
1767+
* asynchronously through
1768+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
1769+
* and should be treated as rough guidance rather than the representation
1770+
* understood by the model. The client can optionally set the language and prompt
1771+
* for transcription, these fields will be passed to the Whisper API.
17211772
*/
17221773
input_audio_transcription?: Session.InputAudioTranscription;
17231774

@@ -1801,15 +1852,33 @@ export namespace SessionUpdateEvent {
18011852
* Configuration for input audio transcription, defaults to off and can be set to
18021853
* `null` to turn off once on. Input audio transcription is not native to the
18031854
* model, since the model consumes audio directly. Transcription runs
1804-
* asynchronously through Whisper and should be treated as rough guidance rather
1805-
* than the representation understood by the model.
1855+
* asynchronously through
1856+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
1857+
* and should be treated as rough guidance rather than the representation
1858+
* understood by the model. The client can optionally set the language and prompt
1859+
* for transcription, these fields will be passed to the Whisper API.
18061860
*/
18071861
export interface InputAudioTranscription {
1862+
/**
1863+
* The language of the input audio. Supplying the input language in
1864+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
1865+
* format will improve accuracy and latency.
1866+
*/
1867+
language?: string;
1868+
18081869
/**
18091870
* The model to use for transcription, `whisper-1` is the only currently supported
18101871
* model.
18111872
*/
18121873
model?: string;
1874+
1875+
/**
1876+
* An optional text to guide the model's style or continue a previous audio
1877+
* segment. The
1878+
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
1879+
* should match the audio language.
1880+
*/
1881+
prompt?: string;
18131882
}
18141883

18151884
export interface Tool {

src/resources/beta/realtime/sessions.ts

+28-7
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ export interface SessionCreateResponse {
203203
/**
204204
* Ephemeral key returned by the API.
205205
*/
206-
client_secret?: SessionCreateResponse.ClientSecret;
206+
client_secret: SessionCreateResponse.ClientSecret;
207207

208208
/**
209209
* The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
@@ -292,14 +292,14 @@ export namespace SessionCreateResponse {
292292
* Timestamp for when the token expires. Currently, all tokens expire after one
293293
* minute.
294294
*/
295-
expires_at?: number;
295+
expires_at: number;
296296

297297
/**
298298
* Ephemeral key usable in client environments to authenticate connections to the
299299
* Realtime API. Use this in client-side environments rather than a standard API
300300
* token, which should only be used server-side.
301301
*/
302-
value?: string;
302+
value: string;
303303
}
304304

305305
/**
@@ -385,8 +385,11 @@ export interface SessionCreateParams {
385385
* Configuration for input audio transcription, defaults to off and can be set to
386386
* `null` to turn off once on. Input audio transcription is not native to the
387387
* model, since the model consumes audio directly. Transcription runs
388-
* asynchronously through Whisper and should be treated as rough guidance rather
389-
* than the representation understood by the model.
388+
* asynchronously through
389+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
390+
* and should be treated as rough guidance rather than the representation
391+
* understood by the model. The client can optionally set the language and prompt
392+
* for transcription, these fields will be passed to the Whisper API.
390393
*/
391394
input_audio_transcription?: SessionCreateParams.InputAudioTranscription;
392395

@@ -470,15 +473,33 @@ export namespace SessionCreateParams {
470473
* Configuration for input audio transcription, defaults to off and can be set to
471474
* `null` to turn off once on. Input audio transcription is not native to the
472475
* model, since the model consumes audio directly. Transcription runs
473-
* asynchronously through Whisper and should be treated as rough guidance rather
474-
* than the representation understood by the model.
476+
* asynchronously through
477+
* [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
478+
* and should be treated as rough guidance rather than the representation
479+
* understood by the model. The client can optionally set the language and prompt
480+
* for transcription, these fields will be passed to the Whisper API.
475481
*/
476482
export interface InputAudioTranscription {
483+
/**
484+
* The language of the input audio. Supplying the input language in
485+
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
486+
* format will improve accuracy and latency.
487+
*/
488+
language?: string;
489+
477490
/**
478491
* The model to use for transcription, `whisper-1` is the only currently supported
479492
* model.
480493
*/
481494
model?: string;
495+
496+
/**
497+
* An optional text to guide the model's style or continue a previous audio
498+
* segment. The
499+
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
500+
* should match the audio language.
501+
*/
502+
prompt?: string;
482503
}
483504

484505
export interface Tool {

0 commit comments

Comments
 (0)