openai
diff --git a/‎.stats.yml
+1-1 b/‎.stats.yml
+1-1
diff --git a/‎api.md
+1 b/‎api.md
+1
diff --git a/‎src/index.ts
+1 b/‎src/index.ts
+1
diff --git a/‎src/resources/audio/transcriptions.ts
+2-2 b/‎src/resources/audio/transcriptions.ts
+2-2
diff --git a/‎src/resources/batches.ts
+14-6 b/‎src/resources/batches.ts
+14-6
diff --git a/‎src/resources/beta/assistants.ts
+25-17 b/‎src/resources/beta/assistants.ts
+25-17
diff --git a/‎src/resources/beta/realtime/realtime.ts
+79-10 b/‎src/resources/beta/realtime/realtime.ts
+79-10
diff --git a/‎src/resources/beta/realtime/sessions.ts
+28-7 b/‎src/resources/beta/realtime/sessions.ts
+28-7
@@ -1,2 +1,2 @@
 configured_endpoints: 69
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-6204952a29973265b9c0d66fc67ffaf53c6a90ae4d75cdacf9d147676f5274c9.yml
@@ -5,6 +5,7 @@ Types:
 - <code><a href="./src/resources/shared.ts">ErrorObject</a></code>
 - <code><a href="./src/resources/shared.ts">FunctionDefinition</a></code>
 - <code><a href="./src/resources/shared.ts">FunctionParameters</a></code>
+- <code><a href="./src/resources/shared.ts">Metadata</a></code>
 - <code><a href="./src/resources/shared.ts">ResponseFormatJSONObject</a></code>
 - <code><a href="./src/resources/shared.ts">ResponseFormatJSONSchema</a></code>
 - <code><a href="./src/resources/shared.ts">ResponseFormatText</a></code>
 
@@ -451,6 +451,7 @@ export declare namespace OpenAI {
   export type ErrorObject = API.ErrorObject;
   export type FunctionDefinition = API.FunctionDefinition;
   export type FunctionParameters = API.FunctionParameters;
+  export type Metadata = API.Metadata;
   export type ResponseFormatJSONObject = API.ResponseFormatJSONObject;
   export type ResponseFormatJSONSchema = API.ResponseFormatJSONSchema;
   export type ResponseFormatText = API.ResponseFormatText;
 
@@ -166,8 +166,8 @@ export interface TranscriptionCreateParams<
 
   /**
    * The language of the input audio. Supplying the input language in
-   * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-   * improve accuracy and latency.
+   * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+   * format will improve accuracy and latency.
    */
   language?: string;
 
 
@@ -4,6 +4,7 @@ import { APIResource } from '../resource';
 import { isRequestOptions } from '../core';
 import * as Core from '../core';
 import * as BatchesAPI from './batches';
+import * as Shared from './shared';
 import { CursorPage, type CursorPageParams } from '../pagination';
 
 export class Batches extends APIResource {
@@ -138,11 +139,13 @@ export interface Batch {
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format. Keys
-   * can be a maximum of 64 characters long and values can be a maxium of 512
-   * characters long.
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
    */
-  metadata?: unknown | null;
+  metadata?: Shared.Metadata | null;
 
   /**
    * The ID of the file containing the outputs of successfully executed requests.
@@ -237,9 +240,14 @@ export interface BatchCreateParams {
   input_file_id: string;
 
   /**
-   * Optional custom metadata for the batch.
+   * Set of 16 key-value pairs that can be attached to an object. This can be useful
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
    */
-  metadata?: Record<string, string> | null;
+  metadata?: Shared.Metadata | null;
 }
 
 export interface BatchListParams extends CursorPageParams {}
 
@@ -111,11 +111,13 @@ export interface Assistant {
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format. Keys
-   * can be a maximum of 64 characters long and values can be a maxium of 512
-   * characters long.
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
    */
-  metadata: unknown | null;
+  metadata: Shared.Metadata | null;
 
   /**
    * ID of the model to use. You can use the
@@ -1118,11 +1120,13 @@ export interface AssistantCreateParams {
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format. Keys
-   * can be a maximum of 64 characters long and values can be a maxium of 512
-   * characters long.
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
    */
-  metadata?: unknown | null;
+  metadata?: Shared.Metadata | null;
 
   /**
    * The name of the assistant. The maximum length is 256 characters.
@@ -1242,12 +1246,14 @@ export namespace AssistantCreateParams {
         file_ids?: Array<string>;
 
         /**
-         * Set of 16 key-value pairs that can be attached to a vector store. This can be
-         * useful for storing additional information about the vector store in a structured
-         * format. Keys can be a maximum of 64 characters long and values can be a maxium
-         * of 512 characters long.
+         * Set of 16 key-value pairs that can be attached to an object. This can be useful
+         * for storing additional information about the object in a structured format, and
+         * querying for objects via API or the dashboard.
+         *
+         * Keys are strings with a maximum length of 64 characters. Values are strings with
+         * a maximum length of 512 characters.
          */
-        metadata?: unknown;
+        metadata?: Shared.Metadata | null;
       }
     }
   }
@@ -1267,11 +1273,13 @@ export interface AssistantUpdateParams {
 
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format. Keys
-   * can be a maximum of 64 characters long and values can be a maxium of 512
-   * characters long.
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
    */
-  metadata?: unknown | null;
+  metadata?: Shared.Metadata | null;
 
   /**
    * ID of the model to use. You can use the
 
@@ -2,6 +2,7 @@
 
 import { APIResource } from '../../../resource';
 import * as RealtimeAPI from './realtime';
+import * as Shared from '../../shared';
 import * as SessionsAPI from './sessions';
 import {
   Session as SessionsAPISession,
@@ -741,9 +742,38 @@ export interface RealtimeResponse {
   id?: string;
 
   /**
-   * Developer-provided string key-value pairs associated with this response.
+   * Which conversation the response is added to, determined by the `conversation`
+   * field in the `response.create` event. If `auto`, the response will be added to
+   * the default conversation and the value of `conversation_id` will be an id like
+   * `conv_1234`. If `none`, the response will not be added to any conversation and
+   * the value of `conversation_id` will be `null`. If responses are being triggered
+   * by server VAD, the response will be added to the default conversation, thus the
+   * `conversation_id` will be an id like `conv_1234`.
    */
-  metadata?: unknown | null;
+  conversation_id?: string;
+
+  /**
+   * Maximum number of output tokens for a single assistant response, inclusive of
+   * tool calls, that was used in this response.
+   */
+  max_output_tokens?: number | 'inf';
+
+  /**
+   * Set of 16 key-value pairs that can be attached to an object. This can be useful
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
+   */
+  metadata?: Shared.Metadata | null;
+
+  /**
+   * The set of modalities the model used to respond. If there are multiple
+   * modalities, the model will pick one, for example if `modalities` is
+   * `["text", "audio"]`, the model could be responding in either text or audio.
+   */
+  modalities?: Array<'text' | 'audio'>;
 
   /**
    * The object type, must be `realtime.response`.
@@ -755,6 +785,11 @@ export interface RealtimeResponse {
    */
   output?: Array<ConversationItem>;
 
+  /**
+   * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+   */
+  output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
+
   /**
    * The final status of the response (`completed`, `cancelled`, `failed`, or
    * `incomplete`).
@@ -766,13 +801,24 @@ export interface RealtimeResponse {
    */
   status_details?: RealtimeResponseStatus;
 
+  /**
+   * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+   */
+  temperature?: number;
+
   /**
    * Usage statistics for the Response, this will correspond to billing. A Realtime
    * API session will maintain a conversation context and append new Items to the
    * Conversation, thus output from previous turns (text and audio tokens) will
    * become the input for later turns.
    */
   usage?: RealtimeResponseUsage;
+
+  /**
+   * The voice the model used to respond. Current voice options are `alloy`, `ash`,
+   * `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+   */
+  voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
 }
 
 /**
@@ -1320,11 +1366,13 @@ export namespace ResponseCreateEvent {
 
     /**
      * Set of 16 key-value pairs that can be attached to an object. This can be useful
-     * for storing additional information about the object in a structured format. Keys
-     * can be a maximum of 64 characters long and values can be a maximum of 512
-     * characters long.
+     * for storing additional information about the object in a structured format, and
+     * querying for objects via API or the dashboard.
+     *
+     * Keys are strings with a maximum length of 64 characters. Values are strings with
+     * a maximum length of 512 characters.
      */
-    metadata?: unknown | null;
+    metadata?: Shared.Metadata | null;
 
     /**
      * The set of modalities the model can respond with. To disable audio, set this to
@@ -1716,8 +1764,11 @@ export namespace SessionUpdateEvent {
      * Configuration for input audio transcription, defaults to off and can be set to
      * `null` to turn off once on. Input audio transcription is not native to the
      * model, since the model consumes audio directly. Transcription runs
-     * asynchronously through Whisper and should be treated as rough guidance rather
-     * than the representation understood by the model.
+     * asynchronously through
+     * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+     * and should be treated as rough guidance rather than the representation
+     * understood by the model. The client can optionally set the language and prompt
+     * for transcription, these fields will be passed to the Whisper API.
      */
     input_audio_transcription?: Session.InputAudioTranscription;
 
@@ -1801,15 +1852,33 @@ export namespace SessionUpdateEvent {
      * Configuration for input audio transcription, defaults to off and can be set to
      * `null` to turn off once on. Input audio transcription is not native to the
      * model, since the model consumes audio directly. Transcription runs
-     * asynchronously through Whisper and should be treated as rough guidance rather
-     * than the representation understood by the model.
+     * asynchronously through
+     * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+     * and should be treated as rough guidance rather than the representation
+     * understood by the model. The client can optionally set the language and prompt
+     * for transcription, these fields will be passed to the Whisper API.
      */
     export interface InputAudioTranscription {
+      /**
+       * The language of the input audio. Supplying the input language in
+       * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+       * format will improve accuracy and latency.
+       */
+      language?: string;
+
       /**
        * The model to use for transcription, `whisper-1` is the only currently supported
        * model.
        */
       model?: string;
+
+      /**
+       * An optional text to guide the model's style or continue a previous audio
+       * segment. The
+       * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+       * should match the audio language.
+       */
+      prompt?: string;
     }
 
     export interface Tool {
 
@@ -203,7 +203,7 @@ export interface SessionCreateResponse {
   /**
    * Ephemeral key returned by the API.
    */
-  client_secret?: SessionCreateResponse.ClientSecret;
+  client_secret: SessionCreateResponse.ClientSecret;
 
   /**
    * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
@@ -292,14 +292,14 @@ export namespace SessionCreateResponse {
      * Timestamp for when the token expires. Currently, all tokens expire after one
      * minute.
      */
-    expires_at?: number;
+    expires_at: number;
 
     /**
      * Ephemeral key usable in client environments to authenticate connections to the
      * Realtime API. Use this in client-side environments rather than a standard API
      * token, which should only be used server-side.
      */
-    value?: string;
+    value: string;
   }
 
   /**
@@ -385,8 +385,11 @@ export interface SessionCreateParams {
    * Configuration for input audio transcription, defaults to off and can be set to
    * `null` to turn off once on. Input audio transcription is not native to the
    * model, since the model consumes audio directly. Transcription runs
-   * asynchronously through Whisper and should be treated as rough guidance rather
-   * than the representation understood by the model.
+   * asynchronously through
+   * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+   * and should be treated as rough guidance rather than the representation
+   * understood by the model. The client can optionally set the language and prompt
+   * for transcription, these fields will be passed to the Whisper API.
    */
   input_audio_transcription?: SessionCreateParams.InputAudioTranscription;
 
@@ -470,15 +473,33 @@ export namespace SessionCreateParams {
    * Configuration for input audio transcription, defaults to off and can be set to
    * `null` to turn off once on. Input audio transcription is not native to the
    * model, since the model consumes audio directly. Transcription runs
-   * asynchronously through Whisper and should be treated as rough guidance rather
-   * than the representation understood by the model.
+   * asynchronously through
+   * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+   * and should be treated as rough guidance rather than the representation
+   * understood by the model. The client can optionally set the language and prompt
+   * for transcription, these fields will be passed to the Whisper API.
    */
   export interface InputAudioTranscription {
+    /**
+     * The language of the input audio. Supplying the input language in
+     * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+     * format will improve accuracy and latency.
+     */
+    language?: string;
+
     /**
      * The model to use for transcription, `whisper-1` is the only currently supported
      * model.
      */
     model?: string;
+
+    /**
+     * An optional text to guide the model's style or continue a previous audio
+     * segment. The
+     * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+     * should match the audio language.
+     */
+    prompt?: string;
   }
 
   export interface Tool {
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`configured_endpoints: 69`
`2`		`-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml`
	`2`	`+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-6204952a29973265b9c0d66fc67ffaf53c6a90ae4d75cdacf9d147676f5274c9.yml`